From e2b9227d8d6629035041283bcbb57ae52b99884e Mon Sep 17 00:00:00 2001 From: carneiro Date: Tue, 3 May 2011 21:16:37 +0000 Subject: [PATCH] script to test BQSR on good/bad regions git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5743 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffs/carneiro/justRecalibrate.scala | 116 ++++++++++++++++++ 1 file changed, 116 insertions(+) create mode 100755 scala/qscript/oneoffs/carneiro/justRecalibrate.scala diff --git a/scala/qscript/oneoffs/carneiro/justRecalibrate.scala b/scala/qscript/oneoffs/carneiro/justRecalibrate.scala new file mode 100755 index 000000000..4867bf0bd --- /dev/null +++ b/scala/qscript/oneoffs/carneiro/justRecalibrate.scala @@ -0,0 +1,116 @@ +package oneoffs.carneiro + +import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk._ + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 4/20/11 + * Time: 16:29 PM + */ + + +class justRecalibrate extends QScript { + + @Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true) + var GATKjar: File = _ + + @Input(doc="input BAM file - or list of BAM files", shortName="i", required=true) + var input: File = _ + + @Input(doc="path to AnalyzeCovariates.jar", fullName="path_to_ac_jar", shortName="ac", required=false) + var ACJar: File = new File("/humgen/gsa-scr1/carneiro/stable/dist/AnalyzeCovariates.jar") + + @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false) + var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R") + + @Input(doc="bad regions interval", shortName="bad", required=false) + var badInterval: File = new File("/humgen/gsa-hpprojects/dev/carneiro/goodbad/data/bad_regions.hg19.intervals") + + @Input(doc="Reference fasta file", shortName="R", required=false) + var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + + @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) + var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") + + val queueLogDir: String = ".qlog/" + + + def script = { + + val first: Boolean = true + val bad: Boolean = true + + val badRecalFile1: File = new File("bad_recal1.csv"); + val badRecalFile2: File = new File("bad_recal2.csv"); + val badBam: File = new File("bad.bam"); + val badPath1: String = "bad1"; + val badPath2: String = "bad2"; + + val goodRecalFile1: File = new File("good_recal1.csv") + val goodRecalFile2: File = new File("good_recal2.csv") + val goodBam: File = new File("good.bam") + val goodPath1: String = "good1" + val goodPath2: String = "good2" + + add(cov(input, badRecalFile1, first, bad), + recal(input, badRecalFile1, badBam, bad), + cov(badBam, badRecalFile2, !first, bad), + analyzeCovariates(badRecalFile1, badPath1), + analyzeCovariates(badRecalFile2, badPath2)) + + add(cov(input, goodRecalFile1, first, !bad), + recal(input, goodRecalFile1, goodBam, !bad), + cov(goodBam, goodRecalFile2, !first, !bad), + analyzeCovariates(goodRecalFile1, goodPath1), + analyzeCovariates(goodRecalFile2, goodPath2)) + } + + trait CommandLineGATKArgs extends CommandLineGATK { + this.jarFile = GATKjar + this.reference_sequence = reference + this.memoryLimit = 4 + this.isIntermediate = true + } + + case class cov (inBam: File, outRecalFile: File, FIRST: Boolean, BAD: Boolean) extends CountCovariates with CommandLineGATKArgs { + this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) + this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") + this.input_file :+= inBam + this.recal_file = outRecalFile + this.useOriginalQualities = FIRST + this.analysisName = queueLogDir + outRecalFile + ".covariates" + this.jobName = queueLogDir + outRecalFile + ".covariates" + if (BAD) { + this.intervals :+= badInterval + this.scatterCount = 85 + } + else + this.excludeIntervals :+= badInterval + } + + case class recal (inBam: File, inRecalFile: File, outBam: File, BAD: Boolean) extends TableRecalibration with CommandLineGATKArgs { + this.input_file :+= inBam + this.recal_file = inRecalFile + this.out = outBam + this.isIntermediate = false + this.analysisName = queueLogDir + outBam + ".recalibration" + this.jobName = queueLogDir + outBam + ".recalibration" + if (BAD) { + this.intervals :+= badInterval + this.scatterCount = 85 + } + else + this.excludeIntervals :+= badInterval + } + + case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates { + this.jarFile = ACJar + this.resources = R + this.recal_file = inRecalFile + this.output_dir = outPath.toString + this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates" + this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" + } +} \ No newline at end of file