Updates and bug fixes to private mutations qscript and pipeline libraries. Hand filter strings are now not busted (boo to having to escape quotes); convenience method added to VariantCalling to propagate standard trait data to a given GATK command line -- should be made more scala-esque in the future.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4824 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cf75caf653
commit
7bc2049031
|
|
@ -1,7 +1,7 @@
|
|||
import collection.JavaConversions._
|
||||
import java.io.FileNotFoundException
|
||||
import org.broadinstitute.sting.datasources.pipeline._
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.{VariantFiltration, UnifiedGenotyper}
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.library.clf.vcf._
|
||||
import org.broadinstitute.sting.queue.pipeline._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
|
@ -52,6 +52,33 @@ class private_mutations extends QScript {
|
|||
|
||||
add(extract_afr)
|
||||
add(extract_eur)
|
||||
|
||||
var eval_all : VariantEval = vcLib.addTrait(new VariantEval)
|
||||
eval_all.rodBind :+= new RodBind("evalEOMI","vcf",finalMergedVCF)
|
||||
eval_all.noStandard = true
|
||||
eval_all.E :+= "PrivatePermutations"
|
||||
eval_all.out = swapExt(finalMergedVCF,".vcf",".perm.csv")
|
||||
eval_all.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
|
||||
|
||||
add(eval_all)
|
||||
|
||||
var eval_afr : VariantEval = vcLib.addTrait(new VariantEval)
|
||||
eval_afr.rodBind :+= new RodBind("evalAFR","VCF",extract_afr.outputVCF)
|
||||
eval_afr.rodBind :+= new RodBind("compEUR","VCF",extract_eur.outputVCF)
|
||||
eval_afr.E :+= "PrivatePermutations"
|
||||
eval_afr.out = swapExt(extract_afr.outputVCF,".vcf",".perm.csv")
|
||||
eval_afr.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
|
||||
|
||||
add(eval_afr)
|
||||
|
||||
var eval_eur : VariantEval = vcLib.addTrait(new VariantEval)
|
||||
eval_eur.rodBind :+= new RodBind("compAFR","VCF",extract_afr.outputVCF)
|
||||
eval_eur.rodBind :+= new RodBind("evalEUR","VCF",extract_eur.outputVCF)
|
||||
eval_eur.E :+= "PrivatePermutations"
|
||||
eval_eur.out = swapExt(extract_eur.outputVCF,".vcf",".perm.csv")
|
||||
eval_eur.reportType = Some(org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType.CSV)
|
||||
|
||||
add(eval_eur)
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -25,7 +25,7 @@ class ProjectManagement(stingPath: String) {
|
|||
@Argument(doc="Path to the reference file on disk") var ref: File = _
|
||||
|
||||
def commandLine = {
|
||||
"egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format(
|
||||
"egrep \"FORMAT|format\" %s | cut -f1-8 > %s ; grep PASS %s | tr ':' '\\t' | awk '{print $1\"\\t\"$2\"\\t\"$3\"\\t\"$4\"\\t\"$5\"\\t\"$6\"\\t.\\t.\\t.\"}' | sort -n -k2,2 | uniq | perl %s - %s.fai >> %s".format(
|
||||
vcf_files(0).getAbsolutePath, out_vcf.getAbsolutePath, vcf_files.foldLeft[String]("")( (b,a) => b + " " + a.getAbsolutePath), sortByRef, ref.getAbsolutePath, out_vcf.getAbsolutePath
|
||||
)
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,21 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) {
|
|||
this.jarFile = vc.gatkJar
|
||||
}
|
||||
|
||||
/**
|
||||
* @Doc: Adds the trait data to a command line gatk that is passed in
|
||||
* @Return: the input CLGATK with the SCLGATK data propagated into it
|
||||
* @TODO: This should be better written, it'd be nice just to call it with addTrait[T], and return a T with SCLGATK
|
||||
*/
|
||||
def addTrait[T <: CommandLineGATK](c : T) : T = {
|
||||
c.reference_sequence = vc.attributes.getProject.getReferenceFile
|
||||
c.intervals = List(vc.attributes.getProject.getIntervalList)
|
||||
c.DBSNP = vc.attributes.getProject.getDbsnpFile
|
||||
// set global memory limit on the low side. Additional input bams will affect it.
|
||||
c.memoryLimit = Some(2)
|
||||
c.jarFile = vc.gatkJar
|
||||
c
|
||||
}
|
||||
|
||||
/**
|
||||
* @Doc: Creates a standard UnifiedGenotyper CLF from input bams and an output file
|
||||
* @Return: UnifiedGenotyper with the standard GSA arguments
|
||||
|
|
@ -136,9 +151,9 @@ class VariantCalling(attribs: Pipeline,gatkJar: File) {
|
|||
hFil.analysisName = "HandFilter"
|
||||
hFil.out = output
|
||||
hFil.variantVCF = snps
|
||||
hFil.filterExpression :+= "QD<5"
|
||||
hFil.filterExpression :+= "\"QD<5.0\""
|
||||
hFil.filterName :+= "LowQualByDepth"
|
||||
hFil.filterExpression :+= "SB>-0.10"
|
||||
hFil.filterExpression :+= "\"SB>-0.10\""
|
||||
hFil.filterName :+= "HighStrandBias"
|
||||
|
||||
return hFil
|
||||
|
|
|
|||
Loading…
Reference in New Issue