pacbio calling pipeline also using VQSR2 now, minor updates on the other pipelines to get the papuans through.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5479 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4e449905d1
commit
96628457cb
|
|
@ -207,8 +207,8 @@ class dataProcessing extends QScript {
|
|||
this.jobName = queueLogDir + outBamList + ".bamList"
|
||||
}
|
||||
|
||||
class joinBams(inBams: File, outBam: String) extends PrintReads {
|
||||
this.input_file :+= inBams
|
||||
class joinBams(inBams: String, outBam: String) extends PrintReads {
|
||||
this.input_file :+= new File(inBams)
|
||||
this.out = new File(outBam)
|
||||
this.jobName = queueLogDir + inBams + ".joinBams"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
|
||||
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||
import org.broadinstitute.sting.queue.extensions.gatk.BamGatherFunction
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -21,7 +23,6 @@ class justClean extends QScript {
|
|||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
|
|
@ -34,8 +35,10 @@ class justClean extends QScript {
|
|||
|
||||
def script = {
|
||||
|
||||
println(GATKjar)
|
||||
|
||||
val outBam = swapExt(input, ".bam", ".Qclean.bam")
|
||||
val tIntervals = swapExt(input, ".bam", ".Qall_indels.intervals")
|
||||
val tIntervals = swapExt(input, ".bam", ".all_indels.intervals")
|
||||
|
||||
val target = new RealignerTargetCreator()
|
||||
target.input_file :+= input
|
||||
|
|
@ -49,6 +52,8 @@ class justClean extends QScript {
|
|||
target.jarFile = GATKjar
|
||||
target.scatterCount = 84
|
||||
|
||||
|
||||
|
||||
val clean = new IndelRealigner()
|
||||
clean.input_file :+= input
|
||||
clean.targetIntervals = tIntervals
|
||||
|
|
@ -64,6 +69,13 @@ class justClean extends QScript {
|
|||
clean.memoryLimit = Some(6)
|
||||
clean.scatterCount = 84
|
||||
|
||||
add(target, clean);
|
||||
clean.setupGatherFunction = {
|
||||
case (gather: BamGatherFunction, source: ArgumentSource) =>
|
||||
gather.memoryLimit = Some(6) // Memory limit you expect for the job
|
||||
gather.jarFile = new File("/seq/software/picard/current/bin/MergeSamFiles.jar")
|
||||
}
|
||||
|
||||
|
||||
add(clean);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import org.broadinstitute.sting.gatk.CommandLineGATK
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
|
||||
|
|
@ -34,8 +33,8 @@ class pbCalling extends QScript {
|
|||
val filteredVCF = new File(name + ".filtered.vcf")
|
||||
val titvRecalibratedVCF = new File(name + ".titv.recalibrated.vcf")
|
||||
val titvTranchesFile = new File(name + ".titv.tranches")
|
||||
val tsRecalibratedVCF = new File(name + ".ts.recalibrated.vcf")
|
||||
val tsTranchesFile = new File(name + ".ts.tranches")
|
||||
val recalibratedVCF = new File(name + ".ts.recalibrated.vcf")
|
||||
val tranchesFile = new File(name + ".ts.tranches")
|
||||
val cutVCF = new File(name + ".cut.vcf")
|
||||
val evalFile = new File(name + ".eval")
|
||||
val goldStandardName = qscript.outputDir + "goldStandard/" + baseName
|
||||
|
|
@ -142,9 +141,8 @@ class pbCalling extends QScript {
|
|||
for (target <- targets) {
|
||||
add(new UnifiedGenotyper(target))
|
||||
add(new VariantFiltration(target))
|
||||
add(new GenerateVariantClusters(target, !goldStandard))
|
||||
// add(new VariantRecalibratorTiTv(target, !goldStandard))
|
||||
add(new VariantRecalibratorNRS(target, !goldStandard))
|
||||
add(new VQSR(target, !goldStandard))
|
||||
add(new applyVQSR(target, !goldStandard))
|
||||
add(new VariantCut(target))
|
||||
add(new VariantEvaluation(target))
|
||||
}
|
||||
|
|
@ -189,83 +187,45 @@ class pbCalling extends QScript {
|
|||
this.analysisName = t.name + "_VF"
|
||||
}
|
||||
|
||||
// 3.) VQSR part1 Generate Gaussian clusters based on truth sites
|
||||
class GenerateVariantClusters(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.GenerateVariantClusters {
|
||||
this.jarFile = gatkJarFile
|
||||
val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name }
|
||||
this.reference_sequence = t.reference
|
||||
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
|
||||
if( t.hapmapFile.contains("b37") )
|
||||
this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf")
|
||||
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
|
||||
this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile }
|
||||
if (t.isCCS)
|
||||
this.use_annotation ++= List("QD", "HaplotypeScore", "HRun")
|
||||
else
|
||||
this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
|
||||
this.analysisName = name + "_GVC"
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.qual = Some(350) // clustering parameters to be updated soon pending new experimentation results
|
||||
this.std = Some(3.5)
|
||||
this.mG = Some(10)
|
||||
this.ignoreFilter ++= FiltersToIgnore
|
||||
if (t.dbsnpFile.endsWith(".rod"))
|
||||
this.DBSNP = new File(t.dbsnpFile)
|
||||
else if (t.dbsnpFile.endsWith(".vcf"))
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
|
||||
class VQSR(t: Target, goldStandard: Boolean) extends ContrastiveRecalibrator {
|
||||
this.memoryLimit = Some(6)
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
|
||||
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
|
||||
if( t.hapmapFile.contains("b37") )
|
||||
this.rodBind :+= RodBind("1kg", "VCF", omni_b37)
|
||||
else if( t.hapmapFile.contains("b36") )
|
||||
this.rodBind :+= RodBind("1kg", "VCF", omni_b36)
|
||||
if (t.dbsnpFile.endsWith(".rod"))
|
||||
this.DBSNP = new File(t.dbsnpFile)
|
||||
else if (t.dbsnpFile.endsWith(".vcf"))
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
|
||||
this.use_annotation ++= List("QD", "SB", "HaplotypeScore", "HRun")
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile }
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
||||
this.allPoly = true
|
||||
this.tranche ++= List("0.1", "0.5", "0.7", "1.0", "1.1", "1.2", "1.5", "1.6", "1.7", "1.8", "1.9", "2.0", "2.1", "2.2", "2.5","3.0", "5.0", "10.0")
|
||||
}
|
||||
|
||||
// 4.) VQSR part2 Calculate new LOD for all input SNPs by evaluating the Gaussian clusters
|
||||
class VariantRecalibratorBase(t: Target, goldStandard: Boolean) extends org.broadinstitute.sting.queue.extensions.gatk.VariantRecalibrator {
|
||||
this.jarFile = gatkJarFile
|
||||
val name: String = if ( goldStandard ) { t.goldStandardName } else { t.name }
|
||||
this.reference_sequence = t.reference
|
||||
if( t.hapmapFile.contains("b37") )
|
||||
this.rodBind :+= RodBind("1kg", "VCF", "/humgen/gsa-hpprojects/GATK/data/Comparisons/Unvalidated/1kg_pilot1_projectCalls/ALL.low_coverage.2010_07.hg19.vcf")
|
||||
this.rodBind :+= RodBind("hapmap", "VCF", t.hapmapFile)
|
||||
this.rodBind :+= RodBind("truth", "VCF", t.hapmapFile)
|
||||
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
|
||||
this.clusterFile = if ( goldStandard ) { t.goldStandardClusterFile } else { t.clusterFile }
|
||||
this.analysisName = name + "_VR"
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.ignoreFilter ++= FiltersToIgnore
|
||||
this.ignoreFilter ++= List("HARD_TO_VALIDATE")
|
||||
this.target_titv = Some(t.titvTarget)
|
||||
if (t.dbsnpFile.endsWith(".rod"))
|
||||
this.DBSNP = new File(t.dbsnpFile)
|
||||
else if (t.dbsnpFile.endsWith(".vcf"))
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", t.dbsnpFile)
|
||||
}
|
||||
|
||||
// 4a.) Choose VQSR tranches based on novel ti/tv
|
||||
class VariantRecalibratorTiTv(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) {
|
||||
this.jarFile = gatkJarFile
|
||||
this.tranche ++= List("0.1", "1.0", "10.0", "100.0")
|
||||
this.out = t.titvRecalibratedVCF
|
||||
this.tranchesFile = t.titvTranchesFile
|
||||
}
|
||||
|
||||
// 4b.) Choose VQSR tranches based on sensitivity to truth set
|
||||
class VariantRecalibratorNRS(t: Target, goldStandard: Boolean) extends VariantRecalibratorBase(t, goldStandard) {
|
||||
this.jarFile = gatkJarFile
|
||||
this.sm = Some(org.broadinstitute.sting.gatk.walkers.variantrecalibration.VariantRecalibrator.SelectionMetricType.TRUTH_SENSITIVITY)
|
||||
this.tranche ++= List("0.1", "1.0", "10.0", "100.0")
|
||||
this.out = t.tsRecalibratedVCF
|
||||
this.priorDBSNP = Some(2.0)
|
||||
this.priorHapMap = Some(2.0)
|
||||
this.prior1KG = Some(2.0)
|
||||
this.tranchesFile = t.tsTranchesFile
|
||||
class applyVQSR (t: Target, goldStandard: Boolean) extends ApplyRecalibration {
|
||||
this.memoryLimit = Some(4)
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.rodBind :+= RodBind("input", "VCF", if ( goldStandard ) { t.goldStandard_VCF } else { t.filteredVCF } )
|
||||
this.tranches_file = if ( goldStandard ) { t.goldStandardTranchesFile } else { t.tranchesFile}
|
||||
this.recal_file = if ( goldStandard ) { t.goldStandardRecalFile } else { t.recalFile }
|
||||
this.fdr_filter_level = Some(2.0)
|
||||
this.out = t.recalibratedVCF
|
||||
}
|
||||
|
||||
// 5.) Variant Cut filter out the variants marked by recalibration to the 99% tranche
|
||||
class VariantCut(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ApplyVariantCuts {
|
||||
this.jarFile = gatkJarFile
|
||||
this.reference_sequence = t.reference
|
||||
this.rodBind :+= RodBind("input", "VCF", t.tsRecalibratedVCF )
|
||||
this.rodBind :+= RodBind("input", "VCF", t.recalibratedVCF )
|
||||
this.analysisName = t.name + "_VC"
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.out = t.cutVCF
|
||||
this.tranchesFile = t.tsTranchesFile
|
||||
this.tranchesFile = t.tranchesFile
|
||||
this.fdr_filter_level = Some(1.0)
|
||||
if (t.dbsnpFile.endsWith(".rod"))
|
||||
this.DBSNP = new File(t.dbsnpFile)
|
||||
|
|
|
|||
Loading…
Reference in New Issue