Added option to exclude intervals during chunk calling.
Removed job priority as temp space isn't as tight at the moment and planning on changing the priority interface. Updated chunk calling with ebanks: - Using "the bundle" of resources. - Using dbsnp 132 and 1000G indel RODs for both RTC & IR. - Using the default maxIntervalSize in RTC. - Removed use of UG.exactCalculation argument. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5814 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d698c87bbf
commit
83e207d9dd
|
|
@ -31,6 +31,9 @@ class WholeGenomePipeline extends QScript {
|
|||
@Input(doc="Bam file list", shortName = "I", required=true)
|
||||
var bamList: File = _
|
||||
|
||||
@Input(doc="Exclude intervals list", shortName = "XL", required=false)
|
||||
var excludeIntervals: List[File] = Nil
|
||||
|
||||
@Argument(doc="path to tmp space for storing intermediate bam files", shortName="cleanerTmpDir", required=true)
|
||||
var cleanerTmpDir: String = _
|
||||
|
||||
|
|
@ -40,10 +43,12 @@ class WholeGenomePipeline extends QScript {
|
|||
@Argument(doc="Chunk size. Defaults to 3,000,000", shortName="chunk", required=false)
|
||||
var chunkSize = 3000000
|
||||
|
||||
val reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta"
|
||||
val dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.vcf"
|
||||
val hapmap = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf"
|
||||
val omni = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf"
|
||||
val resources = "/humgen/gsa-pipeline/resources/5777/b37/"
|
||||
val reference = resources + "human_g1k_v37.fasta"
|
||||
val dbsnp = resources + "dbsnp_132.b37.vcf"
|
||||
val indels = resources + "1000G_indels_for_realignment.b37.vcf"
|
||||
val omni = resources + "1000G_omni2.5.b37.sites.vcf"
|
||||
val hapmap = resources + "hapmap_3.3.b37.sites.vcf"
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.reference_sequence = reference
|
||||
|
|
@ -102,46 +107,48 @@ class WholeGenomePipeline extends QScript {
|
|||
val chunkBase: String = "chunks/" + project + "." + runType + "_chunk_" + chr + "_" + chunkNumber
|
||||
val tmpBase: String = cleanerTmpDir + "/" + chunkBase
|
||||
|
||||
val chunkInterval = "%s:%d-%d".format(chr, start, stop)
|
||||
val chunkInterval = List("%s:%d-%d".format(chr, start, stop))
|
||||
|
||||
val target = new RealignerTargetCreator with CommandLineGATKArgs
|
||||
target.intervalsString = List(chunkInterval)
|
||||
target.input_file :+= bamList
|
||||
target.intervalsString = chunkInterval
|
||||
target.excludeIntervals = excludeIntervals
|
||||
target.mismatchFraction = 0.0
|
||||
target.maxIntervalSize = 700
|
||||
target.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
|
||||
target.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
target.out = tmpBase + ".target.intervals"
|
||||
target.jobOutputFile = chunkBase + ".target.intervals.out"
|
||||
target.isIntermediate = true
|
||||
add(target)
|
||||
|
||||
val clean = new IndelRealigner with CommandLineGATKArgs
|
||||
clean.intervalsString = List(chunkInterval)
|
||||
clean.input_file :+= bamList
|
||||
clean.intervalsString = chunkInterval
|
||||
clean.excludeIntervals = excludeIntervals
|
||||
clean.targetIntervals = target.out
|
||||
clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
|
||||
clean.rodBind :+= RodBind("indels", "VCF", indels)
|
||||
clean.doNotUseSW = true
|
||||
clean.simplifyBAM = true
|
||||
clean.bam_compression = 1
|
||||
clean.out = tmpBase + ".cleaned.bam"
|
||||
clean.jobOutputFile = chunkBase + ".cleaned.bam.out"
|
||||
clean.jobPriority = 51
|
||||
clean.isIntermediate = true
|
||||
add(clean)
|
||||
|
||||
val call = new UnifiedGenotyper with CommandLineGATKArgs
|
||||
call.intervalsString = List(chunkInterval)
|
||||
call.input_file :+= clean.out
|
||||
call.intervalsString = chunkInterval
|
||||
call.excludeIntervals = excludeIntervals
|
||||
call.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
|
||||
call.downsample_to_coverage = 50
|
||||
call.standard_min_confidence_threshold_for_calling = 4.0
|
||||
call.standard_min_confidence_threshold_for_emitting = 4.0
|
||||
call.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY
|
||||
call.exactCalculation = org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL
|
||||
call.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
|
||||
call.GSA_PRODUCTION_ONLY = true
|
||||
call.out = chunkBase + ".vcf"
|
||||
call.jobOutputFile = call.out + ".out"
|
||||
call.jobPriority = 52
|
||||
add(call)
|
||||
|
||||
chunkVcfs :+= call.out
|
||||
|
|
|
|||
Loading…
Reference in New Issue