Now accepts any number of VCFs to evaluate. Runs the standard (now three) variant eval commands and invokes the exomeQC R script. Has some annoying assumptions about paths encoded right now. Example usage below:

setenv DATA ~/Desktop/broadLocal/localData/
java -Djava.io.tmpdir=tmp -jar ../dist/Queue.jar -S ../scala/qscript/oneoffs/depristo/ExomePostQCEval.scala --gatkjarfile ../dist/GenomeAnalysisTK.jar -R $DATA/human_g1k_v37.fasta $* -eval $DATA/ESPGO_Gabriel_NHLBI_eomi_june_2011_batch1.vcf -intervals ~/Desktop/broadLocal/localData/whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list -dbSNP ~/Desktop/broadLocal/localData/dbsnp_132_b37.vcf -eval $DATA/ESPGO_Gabriel_NHLBI_eomi_june_2011_batch2.vcf

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6004 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2011-06-16 12:49:54 +00:00
parent 9254faa27e
commit 38d7733989
1 changed files with 30 additions and 16 deletions

View File

@ -12,12 +12,16 @@ class ExomePostQCEval extends QScript {
@Argument(shortName = "R", doc="ref", required=false)
var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
@Argument(shortName = "eval", doc="ref", required=true)
var evalVCF: File = _
// todo -- should accept argument list as well
@Argument(shortName = "eval", doc="VCFs to evaluate", required=true)
var evalVCFs: List[File]= _
@Argument(shortName = "intervals", doc="intervals", required=true)
val myIntervals: String = null;
@Argument(shortName = "RPath", doc="RPath", required=false)
var RPath: File = new File("../R")
@Argument(shortName = "dbSNP", doc="dbSNP", required=false)
val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/bundle/current/b37/dbsnp_132.b37.vcf")
@ -31,23 +35,27 @@ class ExomePostQCEval extends QScript {
// TODO -- should include "standard" eval for plotting expectations
def script = {
// The basic summary eval
createEval(".summary",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "FunctionalClass"))
for ( evalVCF <- evalVCFs ) {
// The basic summary eval
createEval(evalVCF, ".summary",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "FunctionalClass"))
// The basic summary eval, by AF
createEval(".byAF",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "AlleleFrequency", "FunctionalClass"))
// The basic summary eval, by AF
createEval(evalVCF, ".byAF",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "AlleleFrequency", "FunctionalClass"))
// By sample
createEval(".bySample",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "Sample"))
// By sample
createEval(evalVCF, ".bySample",
List("TiTvVariantEvaluator", "CountVariants", "CompOverlap"),
List("EvalRod", "CompRod", "Novelty", "Sample"))
add(new ExomeQCRScript(evalVCF))
}
}
def createEval(prefix: String, evalModules: List[String], extraStrats: List[String]) {
def createEval(evalVCF: File, prefix: String, evalModules: List[String], extraStrats: List[String]) {
val eval = new Eval(evalVCF)
eval.out = swapExt(evalVCF,".vcf", prefix + ".eval")
eval.evalModule = evalModules
@ -56,12 +64,18 @@ class ExomePostQCEval extends QScript {
}
class Eval(@Input vcf: File) extends VariantEval with UNIVERSAL_GATK_ARGS {
this.rodBind :+= RodBind("eval", "VCF", evalVCF)
this.rodBind :+= RodBind("eval", "VCF", vcf)
if ( dbSNP.exists() )
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.doNotUseAllStandardStratifications = true
this.doNotUseAllStandardModules = true
this.intervalsString = List(myIntervals);
}
class ExomeQCRScript(vcf: File) extends CommandLineFunction {
@Output var pdf: File = swapExt(vcf,".vcf", ".pdf")
val root = swapExt(vcf,".vcf", "") // remove the prefix
def commandLine = "Rscript %s/exomeQC.R %s %s %s".format(RPath, root, root, pdf)
}
}