From b5b8cb959ae5aeaa1157cc84af904fca3dbb8459 Mon Sep 17 00:00:00 2001 From: carneiro Date: Wed, 18 May 2011 20:07:42 +0000 Subject: [PATCH] Added VQSR to the downsampling script and changed memory limits for the clean script. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5817 348d0f76-0448-11de-a6fe-93d51630548a --- .../oneoffs/carneiro/downsampling.scala | 34 +++++++++++++++++++ .../qscript/oneoffs/carneiro/justClean.scala | 8 ++--- 2 files changed, 38 insertions(+), 4 deletions(-) diff --git a/scala/qscript/oneoffs/carneiro/downsampling.scala b/scala/qscript/oneoffs/carneiro/downsampling.scala index 2b000f94a..741b330f2 100644 --- a/scala/qscript/oneoffs/carneiro/downsampling.scala +++ b/scala/qscript/oneoffs/carneiro/downsampling.scala @@ -3,6 +3,7 @@ package oneoffs.carneiro import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ import scala.io.Source._ + /** * Created by IntelliJ IDEA. * User: carneiro @@ -38,6 +39,12 @@ class downsampling extends QScript { @Input(doc="Reference fasta file", shortName="R", required=false) var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") + @Input(doc="HapMap file", shortName="H", required=false) + var hapmap: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf") + + @Input(doc="Omni file", shortName="O", required=false) + var omni: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf") + @Input(doc="dbSNP file", shortName="D", required=false) var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_132_b37.leftAligned.vcf") @@ -55,6 +62,7 @@ class downsampling extends QScript { val queueLogDir: String = ".qlog/" val outFile: String = "cov.out" val fullCoverageVCF = new File("/humgen/gsa-hpprojects/dev/carneiro/downsampling/analysis/fullcov/fullcov.F1.filtered.vcf") + val trancheTarget = "99.0" def script = { val nIntervals = math.min(200, countLines(targetIntervals)) @@ -120,6 +128,32 @@ class downsampling extends QScript { this.jobName = queueLogDir + outFile } + // 3.) Variant Quality Score Recalibration - Generate Recalibration table + case class VQSR(inFile: File, tranchesFiles: File, outFile: File) extends VariantRecalibrator with CommandLineGATKArgs { + this.rodBind :+= RodBind("input", "VCF", inFile) + this.rodBind :+= RodBind("hapmap", "VCF", hapmap, "known=false,training=true,truth=true,prior=15.0") + this.rodBind :+= RodBind("omni", "VCF", omni, "known=false,training=true,truth=true,prior=12.0") + this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP, "known=true,training=false,truth=false,prior=10.0") + this.use_annotation ++= List("QD", "HaplotypeScore", "MQRankSum", "ReadPosRankSum", "HRun") + this.tranches_file = tranchesFile + this.recal_file = outFile + this.allPoly = true + this.tranche ++= List("100.0", "99.9", "99.5", "99.3", "99.0", "98.9", "98.8", "98.5", "98.4", "98.3", "98.2", "98.1", "98.0", "97.9", "97.8", "97.5", "97.0", "95.0", "90.0") + this.analysisName = t.name + "_VQSR" + this.jobName = queueLogDir + outFile + } + + // 4.) Apply the recalibration table to the appropriate tranches + case class applyVQSR (inFile: File, tranchesFiles: File, outFile: File) extends ApplyRecalibration with CommandLineGATKArgs { + this.rodBind :+= RodBind("input", "VCF", inFile) + this.tranches_file = tranchesFile + this.recal_file = inFile + this.ts_filter_level = trancheTarget + this.out = outFile + this.analysisName = outFile + "_AVQSR" + this.jobName = queueLogDir + outFile + } + case class eval (inFile: File, outFile: File) extends VariantEval with CommandLineGATKArgs { this.noST = true this.noEV = true diff --git a/scala/qscript/oneoffs/carneiro/justClean.scala b/scala/qscript/oneoffs/carneiro/justClean.scala index 9225d74e7..1d0ba9b6d 100755 --- a/scala/qscript/oneoffs/carneiro/justClean.scala +++ b/scala/qscript/oneoffs/carneiro/justClean.scala @@ -1,6 +1,3 @@ -import org.broadinstitute.sting.queue.QScript -import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner} - /** * Created by IntelliJ IDEA. * User: carneiro @@ -9,6 +6,9 @@ import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, R * To change this template use File | Settings | File Templates. */ +import org.broadinstitute.sting.queue.extensions.gatk.{IndelRealigner, RealignerTargetCreator, RodBind} +import org.broadinstitute.sting.queue.QScript + class justClean extends QScript { @@ -62,7 +62,7 @@ class justClean extends QScript { clean.doNotUseSW = false clean.jobName = queueLogDir + outBam + ".clean" clean.jarFile = GATKjar - clean.memoryLimit = 24 + clean.memoryLimit = 8 clean.scatterCount = 84 add(target, clean);