From 7bc204903154930aecb355c6e99460bdbd0e5e10 Mon Sep 17 00:00:00 2001 From: chartl Date: Mon, 13 Dec 2010 04:55:13 +0000 Subject: [PATCH] Updates and bug fixes to private mutations qscript and pipeline libraries. Hand filter strings are now not busted (boo to having to escape quotes); convenience method added to VariantCalling to propagate standard trait data to a given GATK command line -- should be made more scala-esque in the future. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4824 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/chartl/private_mutations.q | 29 ++++++++++++++++++- .../queue/pipeline/ProjectManagement.scala | 2 +- .../sting/queue/pipeline/VariantCalling.scala | 19 ++++++++++-- 3 files changed, 46 insertions(+), 4 deletions(-) diff --git a/scala/qscript/chartl/private_mutations.q b/scala/qscript/chartl/private_mutations.q index d4fe3bd70..0e92c341f 100755 --- a/scala/qscript/chartl/private_mutations.q +++ b/scala/qscript/chartl/private_mutations.q @@ -1,7 +1,7 @@ import collection.JavaConversions._ import java.io.FileNotFoundException import org.broadinstitute.sting.datasources.pipeline._ -import org.broadinstitute.sting.queue.extensions.gatk.{VariantFiltration, UnifiedGenotyper} +import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.library.clf.vcf._ import org.broadinstitute.sting.queue.pipeline._ import org.broadinstitute.sting.queue.QScript @@ -52,6 +52,33 @@ class private_mutations extends QScript { add(extract_afr) add(extract_eur) + + var eval_all : VariantEval = vcLib.addTrait(new VariantEval) + eval_all.rodBind :+= new RodBind("evalEOMI","vcf",finalMergedVCF) + eval_all.noStandard = true + eval_all.E :+= "PrivatePermutations" + eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv") + eval_all.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) + + add(eval_all) + + var eval_afr : VariantEval = vcLib.addTrait(new VariantEval) + eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF) + eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF) + eval_afr.E :+= "PrivatePermutations" + eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv") + eval_afr.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) + + add(eval_afr) + + var eval_eur : VariantEval = vcLib.addTrait(new VariantEval) + eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF) + eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF) + eval_eur.E :+= "PrivatePermutations" + eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv") + eval_eur.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV) + + add(eval_eur) } } \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala index 3e6379149..a1ed558e4 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/ProjectManagement.scala @@ -25,7 +25,7 @@ class ProjectManagement(stingPath: String) { @Argument(doc="Path to the reference file on disk") var ref: File = _ def commandLine = { - "egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format( + "egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $1\"\\t\"$2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format( vcf_files(0).getAbsolutePath, out_vcf.getAbsolutePath, vcf_files.foldLeft[String]("")( (b,a) => b + " " + a.getAbsolutePath), sortByRef, ref.getAbsolutePath, out_vcf.getAbsolutePath ) } diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala index 3983e3538..2505b62d8 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala @@ -28,6 +28,21 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) { this.jarFile = vc.gatkJar } + /** + * @Doc: Adds the trait data to a command line gatk that is passed in + * @Return: the input CLGATK with the SCLGATK data propagated into it + * @TODO: This should be better written, it'd be nice just to call it with addTrait[T], and return a T with SCLGATK + */ + def addTrait[T <: CommandLineGATK](c : T) : T = { + c.reference_sequence = vc.attributes.getProject.getReferenceFile + c.intervals = List(vc.attributes.getProject.getIntervalList) + c.DBSNP = vc.attributes.getProject.getDbsnpFile + // set global memory limit on the low side. Additional input bams will affect it. + c.memoryLimit = Some(2) + c.jarFile = vc.gatkJar + c + } + /** * @Doc: Creates a standard UnifiedGenotyper CLF from input bams and an output file * @Return: UnifiedGenotyper with the standard GSA arguments @@ -136,9 +151,9 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) { hFil.analysisName = "HandFilter" hFil.out = output hFil.variantVCF = snps - hFil.filterExpression :+= "QD<5" + hFil.filterExpression :+= "\"QD<5.0\"" hFil.filterName :+= "LowQualByDepth" - hFil.filterExpression :+= "SB>-0.10" + hFil.filterExpression :+= "\"SB>-0.10\"" hFil.filterName :+= "HighStrandBias" return hFil