diff --git a/scala/qscript/oneoffs/carneiro/dataProcessing.scala b/scala/qscript/oneoffs/carneiro/dataProcessing.scala index c9d03d5c3..4307118f6 100755 --- a/scala/qscript/oneoffs/carneiro/dataProcessing.scala +++ b/scala/qscript/oneoffs/carneiro/dataProcessing.scala @@ -207,8 +207,8 @@ class dataProcessing extends QScript { this.jobName = queueLogDir + outBamList + ".bamList" } - class joinBams(inBams: File, outBam: String) extends PrintReads { - this.input_file :+= inBams + class joinBams(inBams: String, outBam: String) extends PrintReads { + this.input_file :+= new File(inBams) this.out = new File(outBam) this.jobName = queueLogDir + inBams + ".joinBams" } diff --git a/scala/qscript/oneoffs/carneiro/justClean.scala b/scala/qscript/oneoffs/carneiro/justClean.scala index d453f619b..9b9781a71 100755 --- a/scala/qscript/oneoffs/carneiro/justClean.scala +++ b/scala/qscript/oneoffs/carneiro/justClean.scala @@ -1,5 +1,7 @@ -import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner} import org.broadinstitute.sting.queue.QScript +import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner} +import org.broadinstitute.sting.commandline.ArgumentSource +import org.broadinstitute.sting.queue.extensions.gatk.BamGatherFunction /** * Created by IntelliJ IDEA. @@ -21,7 +23,6 @@ class justClean extends QScript { @Input(doc="Reference fasta file", shortName="R", required=false) var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta") - @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") @@ -34,8 +35,10 @@ class justClean extends QScript { def script = { + println(GATKjar) + val outBam = swapExt(input, ".bam", ".Qclean.bam") - val tIntervals = swapExt(input, ".bam", ".Qall_indels.intervals") + val tIntervals = swapExt(input, ".bam", ".all_indels.intervals") val target = new RealignerTargetCreator() target.input_file :+= input @@ -49,6 +52,8 @@ class justClean extends QScript { target.jarFile = GATKjar target.scatterCount = 84 + + val clean = new IndelRealigner() clean.input_file :+= input clean.targetIntervals = tIntervals @@ -64,6 +69,13 @@ class justClean extends QScript { clean.memoryLimit = Some(6) clean.scatterCount = 84 - add(target, clean); + clean.setupGatherFunction = { + case (gather: BamGatherFunction, source: ArgumentSource) => + gather.memoryLimit = Some(6) // Memory limit you expect for the job + gather.jarFile = new File("/seq/software/picard/current/bin/MergeSamFiles.jar") + } + + + add(clean); } } diff --git a/scala/qscript/oneoffs/carneiro/pbCalling.scala b/scala/qscript/oneoffs/carneiro/pbCalling.scala index 31e525b55..506a24e3f 100755 --- a/scala/qscript/oneoffs/carneiro/pbCalling.scala +++ b/scala/qscript/oneoffs/carneiro/pbCalling.scala @@ -1,4 +1,3 @@ -import org.broadinstitute.sting.gatk.CommandLineGATK import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript @@ -34,8 +33,8 @@ class pbCalling extends QScript { val filteredVCF = new File(name + ".filtered.vcf") val titvRecalibratedVCF = new File(name + ".titv.recalibrated.vcf") val titvTranchesFile = new File(name + ".titv.tranches") - val tsRecalibratedVCF = new File(name + ".ts.recalibrated.vcf") - val tsTranchesFile = new File(name + ".ts.tranches") + val recalibratedVCF = new File(name + ".ts.recalibrated.vcf") + val tranchesFile = new File(name + ".ts.tranches") val cutVCF = new File(name + ".cut.vcf") val evalFile = new File(name + ".eval") val goldStandardName = qscript.outputDir + "goldStandard/" + baseName @@ -142,9 +141,8 @@ class pbCalling extends QScript { for (target <- targets) { add(new UnifiedGenotyper(target)) add(new VariantFiltration(target)) - add(new GenerateVariantClusters(target, !goldStandard)) -// add(new VariantRecalibratorTiTv(target, !goldStandard)) - add(new VariantRecalibratorNRS(target, !goldStandard)) + add(new VQSR(target, !goldStandard)) + add(new applyVQSR(target, !goldStandard)) add(new VariantCut(target)) add(new VariantEvaluation(target)) } @@ -189,83 +187,45 @@ class pbCalling extends QScript { this.analysisName = t.name + "_VF" } - // 3.) VQSR part1 Generate Gaussian clusters based on truth sites - class GenerateVariantClusters(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.GenerateVariantClusters { - this.jarFile = gatkJarFile - val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name } - this.reference_sequence = t.reference - this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile) - if( t.hapmapFile.contains("b37") ) - this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf") - this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } ) - this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile } - if (t.isCCS) - this.use_annotation ++= List("QD", "HaplotypeScore", "HRun") - else - this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun") - this.analysisName = name + "_GVC" - this.intervalsString ++= List(t.intervals) - this.qual = Some(350) // clustering parameters to be updated soon pending new experimentation results - this.std = Some(3.5) - this.mG = Some(10) - this.ignoreFilter ++= FiltersToIgnore - if (t.dbsnpFile.endsWith(".rod")) - this.DBSNP = new File(t.dbsnpFile) - else if (t.dbsnpFile.endsWith(".vcf")) - this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile) + class VQSR(t: Target, goldStandard: Boolean) extends ContrastiveRecalibrator { + this.memoryLimit = Some(6) + this.intervalsString ++= List(t.intervals) + this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } ) + this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile) + if( t.hapmapFile.contains("b37") ) + this.rodBind :+= RodBind("1kg", "VCF", omni_b37) + else if( t.hapmapFile.contains("b36") ) + this.rodBind :+= RodBind("1kg", "VCF", omni_b36) + if (t.dbsnpFile.endsWith(".rod")) + this.DBSNP = new File(t.dbsnpFile) + else if (t.dbsnpFile.endsWith(".vcf")) + this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile) + this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun") + this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile } + this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile } + this.allPoly = true + this.tranche ++= List("0.1", "0.5", "0.7", "1.0", "1.1", "1.2", "1.5", "1.6", "1.7", "1.8", "1.9", "2.0", "2.1", "2.2", "2.5","3.0", "5.0", "10.0") } - // 4.) VQSR part2 Calculate new LOD for all input SNPs by evaluating the Gaussian clusters - class VariantRecalibratorBase(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.VariantRecalibrator { - this.jarFile = gatkJarFile - val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name } - this.reference_sequence = t.reference - if( t.hapmapFile.contains("b37") ) - this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf") - this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile) - this.rodBind :+= RodBind("truth", "VCF", t.hapmapFile) - this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } ) - this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile } - this.analysisName = name + "_VR" - this.intervalsString ++= List(t.intervals) - this.ignoreFilter ++= FiltersToIgnore - this.ignoreFilter ++= List("HARD_TO_VALIDATE") - this.target_titv = Some(t.titvTarget) - if (t.dbsnpFile.endsWith(".rod")) - this.DBSNP = new File(t.dbsnpFile) - else if (t.dbsnpFile.endsWith(".vcf")) - this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile) - } - - // 4a.) Choose VQSR tranches based on novel ti/tv - class VariantRecalibratorTiTv(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) { - this.jarFile = gatkJarFile - this.tranche ++= List("0.1", "1.0", "10.0", "100.0") - this.out = t.titvRecalibratedVCF - this.tranchesFile = t.titvTranchesFile - } - - // 4b.) Choose VQSR tranches based on sensitivity to truth set - class VariantRecalibratorNRS(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) { - this.jarFile = gatkJarFile - this.sm = Some(org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator.SelectionMetricType.TRUTH_SENSITIVITY) - this.tranche ++= List("0.1", "1.0", "10.0", "100.0") - this.out = t.tsRecalibratedVCF - this.priorDBSNP = Some(2.0) - this.priorHapMap = Some(2.0) - this.prior1KG = Some(2.0) - this.tranchesFile = t.tsTranchesFile + class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration { + this.memoryLimit = Some(4) + this.intervalsString ++= List(t.intervals) + this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } ) + this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile} + this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile } + this.fdr_filter_level = Some(2.0) + this.out = t.recalibratedVCF } // 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche class VariantCut(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ApplyVariantCuts { this.jarFile = gatkJarFile this.reference_sequence = t.reference - this.rodBind :+= RodBind("input", "VCF", t.tsRecalibratedVCF ) + this.rodBind :+= RodBind("input", "VCF", t.recalibratedVCF ) this.analysisName = t.name + "_VC" this.intervalsString ++= List(t.intervals) this.out = t.cutVCF - this.tranchesFile = t.tsTranchesFile + this.tranchesFile = t.tranchesFile this.fdr_filter_level = Some(1.0) if (t.dbsnpFile.endsWith(".rod")) this.DBSNP = new File(t.dbsnpFile)