From c3f70cc5cb417045f27c6c6e27f6e0678d4eb18b Mon Sep 17 00:00:00 2001 From: carneiro Date: Tue, 29 Mar 2011 18:22:09 +0000 Subject: [PATCH] DPP: Updated after some tests with BWA. Still needs more testing. MDP: Removed ApplyVariantCut as it's no longer necessary with VQSR2. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5534 348d0f76-0448-11de-a6fe-93d51630548a --- .../MethodsDevelopmentCallingPipeline.scala | 24 +------------------ .../oneoffs/carneiro/dataProcessingV2.scala | 22 ++++++++--------- 2 files changed, 12 insertions(+), 34 deletions(-) diff --git a/scala/qscript/core/MethodsDevelopmentCallingPipeline.scala b/scala/qscript/core/MethodsDevelopmentCallingPipeline.scala index 84b7ba8ea..aa044aa28 100755 --- a/scala/qscript/core/MethodsDevelopmentCallingPipeline.scala +++ b/scala/qscript/core/MethodsDevelopmentCallingPipeline.scala @@ -38,9 +38,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { @Argument(shortName="eval", doc="adds the VariantEval walker to the pipeline", required=false) var eval: Boolean = false - @Argument(shortName="noCut", doc="removes the ApplyVariantCut walker from the pipeline", required=false) - var noCut: Boolean = false - @Argument(shortName="indels", doc="calls indels with the Unified Genotyper", required=false) var callIndels: Boolean = false @@ -81,7 +78,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { val goldStandardRecalibratedVCF = new File(name + "goldStandard.recalibrated.vcf") val goldStandardTranchesFile = new File(name + "goldStandard.tranches") val goldStandardRecalFile = new File(name + "goldStandard.tranches.recal") - val cutVCF = new File(name + ".cut.vcf") val evalFile = new File(name + ".snp.eval") val evalIndelFile = new File(name + ".indel.eval") val goldStandardName = qscript.outputDir + "goldStandard/" + baseName @@ -171,7 +167,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { add(new snpCall(target)) add(new VQSR(target, !goldStandard)) add(new applyVQSR(target, !goldStandard)) - if (!noCut) add(new VariantCut(target)) if (eval) add(new snpEvaluation(target)) } if ( !skipGoldStandard ) { @@ -285,23 +280,6 @@ class MethodsDevelopmentCallingPipeline extends QScript { } - - // 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche - class VariantCut(t: Target) extends ApplyVariantCuts with UNIVERSAL_GATK_ARGS { - this.reference_sequence = t.reference - this.rodBind :+= RodBind("input", "VCF", t.recalibratedVCF ) - this.intervalsString ++= List(t.intervals) - this.out = t.cutVCF - this.tranchesFile = t.tranchesFile - this.fdr_filter_level = t.trancheTarget - if (t.dbsnpFile.endsWith(".rod")) - this.DBSNP = new File(t.dbsnpFile) - else if (t.dbsnpFile.endsWith(".vcf")) - this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile) - this.analysisName = t.name + "_VC" - this.jobName = queueLogDir + t.name + ".cut" - } - // 6.) Variant Evaluation Base(OPTIONAL) class EvalBase(t: Target) extends VariantEval with UNIVERSAL_GATK_ARGS { this.reference_sequence = t.reference @@ -317,7 +295,7 @@ class MethodsDevelopmentCallingPipeline extends QScript { // 6a.) SNP Evaluation (OPTIONAL) based on the cut vcf class snpEvaluation(t: Target) extends EvalBase(t) { if (t.reference == b37 || t.reference == hg19) this.rodBind :+= RodBind("compomni", "VCF", omni_b37) - this.rodBind :+= RodBind("eval", "VCF", if (useCut) {t.cutVCF} else {t.recalibratedVCF} ) + this.rodBind :+= RodBind("eval", "VCF", t.recalibratedVCF ) this.out = t.evalFile this.analysisName = t.name + "_VEs" this.jobName = queueLogDir + t.name + ".snp.eval" diff --git a/scala/qscript/oneoffs/carneiro/dataProcessingV2.scala b/scala/qscript/oneoffs/carneiro/dataProcessingV2.scala index 783fddcce..b4025f154 100755 --- a/scala/qscript/oneoffs/carneiro/dataProcessingV2.scala +++ b/scala/qscript/oneoffs/carneiro/dataProcessingV2.scala @@ -162,16 +162,16 @@ class dataProcessingV2 extends QScript { // Takes a list of processed BAM files, revert them to unprocessed and realigns each lane, producing a list of // per-lane aligned bam files, ready to be processed. - def performAlignment(bamFiles: List[File]): List[File] = { + def performAlignment(bamList: File): List[File] = { return List() } def createListFromFile(in: File):List[File] = { if (in.toString.endsWith("bam")) return List(in) - val l: List[File] = List() + var l: List[File] = List() for (bam <- fromFile(in).getLines) - l :+= bam + l :+= new File(bam) return l } @@ -185,7 +185,7 @@ class dataProcessingV2 extends QScript { def script = { //todo -- (option - BWA) run BWA on each bam file (per lane bam file) before performing per sample processing - val perLaneAlignedBamFiles: List[File] = if (useBWApe || useBWAse) {performAlignment(input)} else {createListFromFile(input)} + val perLaneAlignedBamFiles: List[File] = if (useBWApe || useBWAse) { performAlignment(input) } else { createListFromFile(input) } // Generate a BAM file per sample joining all per lane files if necessary val sampleBamFiles = createSampleFiles(perLaneAlignedBamFiles) @@ -360,7 +360,7 @@ class dataProcessingV2 extends QScript { } case class sortSam (inSam: File, outBam: File) extends PicardBamFunction { - @Input(doc="input unsorted sam file") var sam = inBams + @Input(doc="input unsorted sam file") var sam = inSam @Output(doc="sorted bam") var bam = outBam @Output(doc="sorted bam index") var bamIndex = new File(outBam + "bai") override def inputBams = List(sam) @@ -403,8 +403,8 @@ class dataProcessingV2 extends QScript { @Output(doc="output sai file") var sai = outSai def commandLine = bwaPath + " aln -q 5 " + reference + " -b " + bam + " > " + sai this.isIntermediate = true - this.analysisName = queueLogDir + outBam + ".bwa_aln_se" - this.jobName = queueLogDir + outBam + ".bwa_aln_se" + this.analysisName = queueLogDir + outSai + ".bwa_aln_se" + this.jobName = queueLogDir + outSai + ".bwa_aln_se" } case class bwa_aln_pe1 (inBam: File, outSai1: File) extends CommandLineFunction { @@ -412,8 +412,8 @@ class dataProcessingV2 extends QScript { @Output(doc="output sai file for 1st mating pair") var sai = outSai1 def commandLine = bwaPath + " aln -q 5 " + reference + " -b1 " + bam + " > " + sai this.isIntermediate = true - this.analysisName = queueLogDir + outBam + ".bwa_aln_pe1" - this.jobName = queueLogDir + outBam + ".bwa_aln_pe1" + this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" + this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } case class bwa_aln_pe2 (inBam: File, outSai2: File) extends CommandLineFunction { @@ -421,8 +421,8 @@ class dataProcessingV2 extends QScript { @Output(doc="output sai file for 2nd mating pair") var sai = outSai2 def commandLine = bwaPath + " aln -q 5 " + reference + " -b2 " + bam + " > " + sai this.isIntermediate = true - this.analysisName = queueLogDir + outBam + ".bwa_aln_pe2" - this.jobName = queueLogDir + outBam + ".bwa_aln_pe2" + this.analysisName = queueLogDir + outSai2 + ".bwa_aln_pe2" + this.jobName = queueLogDir + outSai2 + ".bwa_aln_pe2" } case class bwa_sam_se (inBam: File, inSai: File, outBam: File, readGroup: String) extends CommandLineFunction {