Updates and bug fixes to private mutations qscript and pipeline libraries. Hand filter strings are now not busted (boo to having to escape quotes); convenience method added to VariantCalling to propagate standard trait data to a given GATK command line -- should be made more scala-esque in the future.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4824 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-12-13 04:55:13 +00:00
parent cf75caf653
commit 7bc2049031
3 changed files with 46 additions and 4 deletions

View File

@ -1,7 +1,7 @@
import collection.JavaConversions._
import java.io.FileNotFoundException
import org.broadinstitute.sting.datasources.pipeline._
import org.broadinstitute.sting.queue.extensions.gatk.{VariantFiltration, UnifiedGenotyper}
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.library.clf.vcf._
import org.broadinstitute.sting.queue.pipeline._
import org.broadinstitute.sting.queue.QScript
@ -52,6 +52,33 @@ class private_mutations extends QScript {
add(extract_afr)
add(extract_eur)
var eval_all : VariantEval = vcLib.addTrait(new VariantEval)
eval_all.rodBind :+= new RodBind("evalEOMI","vcf",finalMergedVCF)
eval_all.noStandard = true
eval_all.E :+= "PrivatePermutations"
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
eval_all.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
add(eval_all)
var eval_afr : VariantEval = vcLib.addTrait(new VariantEval)
eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF)
eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF)
eval_afr.E :+= "PrivatePermutations"
eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv")
eval_afr.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
add(eval_afr)
var eval_eur : VariantEval = vcLib.addTrait(new VariantEval)
eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF)
eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF)
eval_eur.E :+= "PrivatePermutations"
eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv")
eval_eur.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
add(eval_eur)
}
}

View File

@ -25,7 +25,7 @@ class ProjectManagement(stingPath: String) {
@Argument(doc="Path to the reference file on disk") var ref: File = _
def commandLine = {
"egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format(
"egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $1\"\\t\"$2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format(
vcf_files(0).getAbsolutePath, out_vcf.getAbsolutePath, vcf_files.foldLeft[String]("")( (b,a) => b + " " + a.getAbsolutePath), sortByRef, ref.getAbsolutePath, out_vcf.getAbsolutePath
)
}

View File

@ -28,6 +28,21 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) {
this.jarFile = vc.gatkJar
}
/**
* @Doc: Adds the trait data to a command line gatk that is passed in
* @Return: the input CLGATK with the SCLGATK data propagated into it
* @TODO: This should be better written, it'd be nice just to call it with addTrait[T], and return a T with SCLGATK
*/
def addTrait[T <: CommandLineGATK](c : T) : T = {
c.reference_sequence = vc.attributes.getProject.getReferenceFile
c.intervals = List(vc.attributes.getProject.getIntervalList)
c.DBSNP = vc.attributes.getProject.getDbsnpFile
// set global memory limit on the low side. Additional input bams will affect it.
c.memoryLimit = Some(2)
c.jarFile = vc.gatkJar
c
}
/**
* @Doc: Creates a standard UnifiedGenotyper CLF from input bams and an output file
* @Return: UnifiedGenotyper with the standard GSA arguments
@ -136,9 +151,9 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) {
hFil.analysisName = "HandFilter"
hFil.out = output
hFil.variantVCF = snps
hFil.filterExpression :+= "QD<5"
hFil.filterExpression :+= "\"QD<5.0\""
hFil.filterName :+= "LowQualByDepth"
hFil.filterExpression :+= "SB>-0.10"
hFil.filterExpression :+= "\"SB>-0.10\""
hFil.filterName :+= "HighStrandBias"
return hFil