From 9f03f09cc910b7a5afadfb6b922c9930288bddc0 Mon Sep 17 00:00:00 2001 From: chartl Date: Wed, 1 Dec 2010 18:55:48 +0000 Subject: [PATCH] Changes to V2 pipeline and libraries. AB dropped. Cleaning enabled. Project name now properly propagated to intermediate files (instead of the string repr of the object). Indel mask is now expanded prior to filtering at indels. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4769 348d0f76-0448-11de-a6fe-93d51630548a --- scala/qscript/chartl/fullCallingPipelineV2.q | 8 ++- .../sting/queue/pipeline/VariantCalling.scala | 56 ++++++++++++++----- 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/scala/qscript/chartl/fullCallingPipelineV2.q b/scala/qscript/chartl/fullCallingPipelineV2.q index 79e9cb63d..2c3976d70 100755 --- a/scala/qscript/chartl/fullCallingPipelineV2.q +++ b/scala/qscript/chartl/fullCallingPipelineV2.q @@ -91,7 +91,7 @@ class fullCallingPipelineV2 extends QScript { } if ( !qscript.skip_cleaning ) { - cleaningLib.StandardIndelRealign(bamsToClean,qscript.cleaningJobs) + addAll(cleaningLib.StandardIndelRealign(bamsToClean,qscript.cleaningJobs)) } if (!qscript.skip_cleaning) { @@ -107,7 +107,11 @@ class fullCallingPipelineV2 extends QScript { var handfilt_vcf = new File(base+"_snps.handfiltered.annotated.vcf") var indel_vcf = new File(base+"_indel_calls.vcf") - for ( c <- lib.StandardCallingPipeline(bamFiles,indel_vcf,recal_vcf,handfilt_vcf,qscript.target_titv,qscript.refseqTable) ) { + addAll(lib.StandardCallingPipeline(bamFiles,indel_vcf,recal_vcf,handfilt_vcf,qscript.target_titv,qscript.refseqTable)) + } + + def addAll(clfs: List[CommandLineFunction]) = { + for ( c <- clfs ) { add(c) } } diff --git a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala index 58c017d65..024ecfd3f 100755 --- a/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala +++ b/scala/src/org/broadinstitute/sting/queue/pipeline/VariantCalling.scala @@ -1,16 +1,13 @@ package org.broadinstitute.sting.queue.pipeline - +import org.broadinstitute.sting.commandline._ +import org.broadinstitute.sting.queue.util._ import java.io.File -import net.sf.picard.reference.FastaSequenceFile import org.broadinstitute.sting.datasources.pipeline.Pipeline import org.broadinstitute.sting.gatk.DownsampleType import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction -import org.broadinstitute.sting.queue.extensions.samtools._ -import org.broadinstitute.sting.queue.{QException, QScript} -import collection.JavaConversions._ import org.broadinstitute.sting.utils.yaml.YamlUtils -import org.broadinstitute.sting.utils.report.VE2ReportFactory.VE2TemplateType +import org.broadinstitute.sting.queue.function.CommandLineFunction + class VariantCalling(yaml: File,gatkJar: File) { vc => @@ -43,10 +40,18 @@ class VariantCalling(yaml: File,gatkJar: File) { ug.dt = Some(DownsampleType.BY_SAMPLE) ug.scatterCount = 50 - if ( bams.size > 125 ) { + if ( bams.size > 40 ) { ug.memoryLimit = Some(4) } + if ( bams.size > 90 ) { + ug.memoryLimit = Some(6) + } + + if ( bams.size > 140 ) { + ug.memoryLimit = Some(8) + } + return ug } @@ -89,6 +94,10 @@ class VariantCalling(yaml: File,gatkJar: File) { //cv.priority = (igList.foldLeft[List[String]](Nil)( (prLs, ig) => prLs ::: List(swapExt(ig.out,".vcf","").getAbsolutePath))).mkString(",") cv.rodBind = igList.map[RodBind,List[RodBind]](ig => new RodBind(swapExt(ig.out,".vcf","").getName,"VCF",ig.out)) + if ( igList.size > 50 ) { + cv.memoryLimit = Some(4) + } + return cv } @@ -127,8 +136,6 @@ class VariantCalling(yaml: File,gatkJar: File) { hFil.variantVCF = snps hFil.filterExpression :+= "QD<5" hFil.filterName :+= "LowQualByDepth" - hFil.filterExpression :+= "AB>0.75" - hFil.filterName :+= "HighAlleleBalance" hFil.filterExpression :+= "SB>-0.10" hFil.filterName :+= "HighStrandBias" @@ -143,7 +150,6 @@ class VariantCalling(yaml: File,gatkJar: File) { genC.use_annotation :+= "QD" genC.use_annotation :+= "SB" genC.use_annotation :+= "HaplotypeScore" - genC.use_annotation :+= "AB" genC.use_annotation :+= "HRun" return genC @@ -283,15 +289,32 @@ class VariantCalling(yaml: File,gatkJar: File) { return commands } - def StandardCallingPipeline(bams: List[File], indelOut: File, recalOut: File, handFilteredOut: File, targetTiTv: scala.Double, refGene: File = null ) : List[CommandLineGATK] = { - var commands : List[CommandLineGATK] = Nil + class VCF2Mask extends CommandLineFunction { + @Input(doc="the indel vcf") var indel_vcf : File = _ + @Argument(doc="the window size") var win_size : Int = 2 + @Output(doc="the mask bed") var out_mask : File = _ + + def commandLine = { "grep PASS %s | awk '{print $1,$2-%d,$2+%d}' > %s".format(indel_vcf.getAbsolutePath,win_size,win_size,out_mask.getAbsolutePath) } + } + + def IndelVCF2Mask(vcf: File, size: Int) : VCF2Mask = { + var masker: VCF2Mask = new VCF2Mask() + masker.indel_vcf = vcf + masker.win_size = size + masker.out_mask = swapExt(vcf,".vcf",".indel_mask.bed") + + return masker + } + + def StandardCallingPipeline(bams: List[File], indelOut: File, recalOut: File, handFilteredOut: File, targetTiTv: scala.Double, refGene: File = null ) : List[CommandLineFunction] = { + var commands : List[CommandLineFunction] = Nil var dir = "" if ( recalOut.getParent != null ) { dir = recalOut.getParent+"/" } - var raw_snp = new File(dir+vc.attributes.getProject+".raw_snps.vcf") + var raw_snp = new File(dir+vc.attributes.getProject.getName+".raw_snps.vcf") var ug = StandardUnifiedGenotyper(bams, raw_snp) commands :+= ug @@ -299,10 +322,13 @@ class VariantCalling(yaml: File,gatkJar: File) { var raw_indel = indelOut var ig = StandardIndelCalls(bams,raw_indel) + var indel_mask : VCF2Mask = IndelVCF2Mask(indelOut,5) + commands ++= ig + commands :+= indel_mask var prefilt_snp = swapExt(raw_snp,".vcf",".indel_filtered.vcf") - var iFilt = StandardFilterAtIndels(raw_snp,raw_indel,prefilt_snp) + var iFilt = StandardFilterAtIndels(raw_snp,indel_mask.out_mask,prefilt_snp) commands :+= iFilt