pacbio calling pipeline also using VQSR2 now, minor updates on the other pipelines to get the papuans through.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5479 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-03-18 22:06:52 +00:00
parent 4e449905d1
commit 96628457cb
3 changed files with 50 additions and 78 deletions

View File

@ -207,8 +207,8 @@ class dataProcessing extends QScript {
this.jobName = queueLogDir + outBamList + ".bamList"
}
class joinBams(inBams: File, outBam: String) extends PrintReads {
this.input_file :+= inBams
class joinBams(inBams: String, outBam: String) extends PrintReads {
this.input_file :+= new File(inBams)
this.out = new File(outBam)
this.jobName = queueLogDir + inBams + ".joinBams"
}

View File

@ -1,5 +1,7 @@
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
import org.broadinstitute.sting.commandline.ArgumentSource
import org.broadinstitute.sting.queue.extensions.gatk.BamGatherFunction
/**
* Created by IntelliJ IDEA.
@ -21,7 +23,6 @@ class justClean extends QScript {
@Input(doc="Reference fasta file", shortName="R", required=false)
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
@ -34,8 +35,10 @@ class justClean extends QScript {
def script = {
println(GATKjar)
val outBam = swapExt(input, ".bam", ".Qclean.bam")
val tIntervals = swapExt(input, ".bam", ".Qall_indels.intervals")
val tIntervals = swapExt(input, ".bam", ".all_indels.intervals")
val target = new RealignerTargetCreator()
target.input_file :+= input
@ -49,6 +52,8 @@ class justClean extends QScript {
target.jarFile = GATKjar
target.scatterCount = 84
val clean = new IndelRealigner()
clean.input_file :+= input
clean.targetIntervals = tIntervals
@ -64,6 +69,13 @@ class justClean extends QScript {
clean.memoryLimit = Some(6)
clean.scatterCount = 84
add(target, clean);
clean.setupGatherFunction = {
case (gather: BamGatherFunction, source: ArgumentSource) =>
gather.memoryLimit = Some(6) // Memory limit you expect for the job
gather.jarFile = new File("/seq/software/picard/current/bin/MergeSamFiles.jar")
}
add(clean);
}
}

View File

@ -1,4 +1,3 @@
import org.broadinstitute.sting.gatk.CommandLineGATK
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript
@ -34,8 +33,8 @@ class pbCalling extends QScript {
val filteredVCF = new File(name + ".filtered.vcf")
val titvRecalibratedVCF = new File(name + ".titv.recalibrated.vcf")
val titvTranchesFile = new File(name + ".titv.tranches")
val tsRecalibratedVCF = new File(name + ".ts.recalibrated.vcf")
val tsTranchesFile = new File(name + ".ts.tranches")
val recalibratedVCF = new File(name + ".ts.recalibrated.vcf")
val tranchesFile = new File(name + ".ts.tranches")
val cutVCF = new File(name + ".cut.vcf")
val evalFile = new File(name + ".eval")
val goldStandardName = qscript.outputDir + "goldStandard/" + baseName
@ -142,9 +141,8 @@ class pbCalling extends QScript {
for (target <- targets) {
add(new UnifiedGenotyper(target))
add(new VariantFiltration(target))
add(new GenerateVariantClusters(target, !goldStandard))
// add(new VariantRecalibratorTiTv(target, !goldStandard))
add(new VariantRecalibratorNRS(target, !goldStandard))
add(new VQSR(target, !goldStandard))
add(new applyVQSR(target, !goldStandard))
add(new VariantCut(target))
add(new VariantEvaluation(target))
}
@ -189,83 +187,45 @@ class pbCalling extends QScript {
this.analysisName = t.name + "_VF"
}
// 3.) VQSR part1 Generate Gaussian clusters based on truth sites
class GenerateVariantClusters(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.GenerateVariantClusters {
this.jarFile = gatkJarFile
val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name }
this.reference_sequence = t.reference
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
if( t.hapmapFile.contains("b37") )
this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf")
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile }
if (t.isCCS)
this.use_annotation ++= List("QD", "HaplotypeScore", "HRun")
else
this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
this.analysisName = name + "_GVC"
this.intervalsString ++= List(t.intervals)
this.qual = Some(350) // clustering parameters to be updated soon pending new experimentation results
this.std = Some(3.5)
this.mG = Some(10)
this.ignoreFilter ++= FiltersToIgnore
if (t.dbsnpFile.endsWith(".rod"))
this.DBSNP = new File(t.dbsnpFile)
else if (t.dbsnpFile.endsWith(".vcf"))
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
class VQSR(t: Target, goldStandard: Boolean) extends ContrastiveRecalibrator {
this.memoryLimit = Some(6)
this.intervalsString ++= List(t.intervals)
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
if( t.hapmapFile.contains("b37") )
this.rodBind :+= RodBind("1kg", "VCF", omni_b37)
else if( t.hapmapFile.contains("b36") )
this.rodBind :+= RodBind("1kg", "VCF", omni_b36)
if (t.dbsnpFile.endsWith(".rod"))
this.DBSNP = new File(t.dbsnpFile)
else if (t.dbsnpFile.endsWith(".vcf"))
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
this.allPoly = true
this.tranche ++= List("0.1", "0.5", "0.7", "1.0", "1.1", "1.2", "1.5", "1.6", "1.7", "1.8", "1.9", "2.0", "2.1", "2.2", "2.5","3.0", "5.0", "10.0")
}
// 4.) VQSR part2 Calculate new LOD for all input SNPs by evaluating the Gaussian clusters
class VariantRecalibratorBase(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.VariantRecalibrator {
this.jarFile = gatkJarFile
val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name }
this.reference_sequence = t.reference
if( t.hapmapFile.contains("b37") )
this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf")
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
this.rodBind :+= RodBind("truth", "VCF", t.hapmapFile)
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile }
this.analysisName = name + "_VR"
this.intervalsString ++= List(t.intervals)
this.ignoreFilter ++= FiltersToIgnore
this.ignoreFilter ++= List("HARD_TO_VALIDATE")
this.target_titv = Some(t.titvTarget)
if (t.dbsnpFile.endsWith(".rod"))
this.DBSNP = new File(t.dbsnpFile)
else if (t.dbsnpFile.endsWith(".vcf"))
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
}
// 4a.) Choose VQSR tranches based on novel ti/tv
class VariantRecalibratorTiTv(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) {
this.jarFile = gatkJarFile
this.tranche ++= List("0.1", "1.0", "10.0", "100.0")
this.out = t.titvRecalibratedVCF
this.tranchesFile = t.titvTranchesFile
}
// 4b.) Choose VQSR tranches based on sensitivity to truth set
class VariantRecalibratorNRS(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) {
this.jarFile = gatkJarFile
this.sm = Some(org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator.SelectionMetricType.TRUTH_SENSITIVITY)
this.tranche ++= List("0.1", "1.0", "10.0", "100.0")
this.out = t.tsRecalibratedVCF
this.priorDBSNP = Some(2.0)
this.priorHapMap = Some(2.0)
this.prior1KG = Some(2.0)
this.tranchesFile = t.tsTranchesFile
class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration {
this.memoryLimit = Some(4)
this.intervalsString ++= List(t.intervals)
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile}
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
this.fdr_filter_level = Some(2.0)
this.out = t.recalibratedVCF
}
// 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche
class VariantCut(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ApplyVariantCuts {
this.jarFile = gatkJarFile
this.reference_sequence = t.reference
this.rodBind :+= RodBind("input", "VCF", t.tsRecalibratedVCF )
this.rodBind :+= RodBind("input", "VCF", t.recalibratedVCF )
this.analysisName = t.name + "_VC"
this.intervalsString ++= List(t.intervals)
this.out = t.cutVCF
this.tranchesFile = t.tsTranchesFile
this.tranchesFile = t.tranchesFile
this.fdr_filter_level = Some(1.0)
if (t.dbsnpFile.endsWith(".rod"))
this.DBSNP = new File(t.dbsnpFile)