Added option to exclude intervals during chunk calling.

Removed job priority as temp space isn't as tight at the moment and planning on changing the priority interface.
Updated chunk calling with ebanks:
- Using "the bundle" of resources.
- Using dbsnp 132 and 1000G indel RODs for both RTC & IR.
- Using the default maxIntervalSize in RTC.
- Removed use of UG.exactCalculation argument.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5814 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2011-05-18 03:48:02 +00:00
parent d698c87bbf
commit 83e207d9dd
1 changed files with 19 additions and 12 deletions

View File

@ -31,6 +31,9 @@ class WholeGenomePipeline extends QScript {
@Input(doc="Bam file list", shortName = "I", required=true) @Input(doc="Bam file list", shortName = "I", required=true)
var bamList: File = _ var bamList: File = _
@Input(doc="Exclude intervals list", shortName = "XL", required=false)
var excludeIntervals: List[File] = Nil
@Argument(doc="path to tmp space for storing intermediate bam files", shortName="cleanerTmpDir", required=true) @Argument(doc="path to tmp space for storing intermediate bam files", shortName="cleanerTmpDir", required=true)
var cleanerTmpDir: String = _ var cleanerTmpDir: String = _
@ -40,10 +43,12 @@ class WholeGenomePipeline extends QScript {
@Argument(doc="Chunk size. Defaults to 3,000,000", shortName="chunk", required=false) @Argument(doc="Chunk size. Defaults to 3,000,000", shortName="chunk", required=false)
var chunkSize = 3000000 var chunkSize = 3000000
val reference = "/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta" val resources = "/humgen/gsa-pipeline/resources/5777/b37/"
val dbsnp = "/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.vcf" val reference = resources + "human_g1k_v37.fasta"
val hapmap = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/HapMap/3.3/sites_r27_nr.b37_fwd.vcf" val dbsnp = resources + "dbsnp_132.b37.vcf"
val omni = "/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/Omni2.5_chip/Omni25_sites_1525_samples.b37.vcf" val indels = resources + "1000G_indels_for_realignment.b37.vcf"
val omni = resources + "1000G_omni2.5.b37.sites.vcf"
val hapmap = resources + "hapmap_3.3.b37.sites.vcf"
trait CommandLineGATKArgs extends CommandLineGATK { trait CommandLineGATKArgs extends CommandLineGATK {
this.reference_sequence = reference this.reference_sequence = reference
@ -102,46 +107,48 @@ class WholeGenomePipeline extends QScript {
val chunkBase: String = "chunks/" + project + "." + runType + "_chunk_" + chr + "_" + chunkNumber val chunkBase: String = "chunks/" + project + "." + runType + "_chunk_" + chr + "_" + chunkNumber
val tmpBase: String = cleanerTmpDir + "/" + chunkBase val tmpBase: String = cleanerTmpDir + "/" + chunkBase
val chunkInterval = "%s:%d-%d".format(chr, start, stop) val chunkInterval = List("%s:%d-%d".format(chr, start, stop))
val target = new RealignerTargetCreator with CommandLineGATKArgs val target = new RealignerTargetCreator with CommandLineGATKArgs
target.intervalsString = List(chunkInterval)
target.input_file :+= bamList target.input_file :+= bamList
target.intervalsString = chunkInterval
target.excludeIntervals = excludeIntervals
target.mismatchFraction = 0.0 target.mismatchFraction = 0.0
target.maxIntervalSize = 700 target.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
target.rodBind :+= RodBind("indels", "VCF", indels)
target.out = tmpBase + ".target.intervals" target.out = tmpBase + ".target.intervals"
target.jobOutputFile = chunkBase + ".target.intervals.out" target.jobOutputFile = chunkBase + ".target.intervals.out"
target.isIntermediate = true target.isIntermediate = true
add(target) add(target)
val clean = new IndelRealigner with CommandLineGATKArgs val clean = new IndelRealigner with CommandLineGATKArgs
clean.intervalsString = List(chunkInterval)
clean.input_file :+= bamList clean.input_file :+= bamList
clean.intervalsString = chunkInterval
clean.excludeIntervals = excludeIntervals
clean.targetIntervals = target.out clean.targetIntervals = target.out
clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp) clean.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
clean.rodBind :+= RodBind("indels", "VCF", indels)
clean.doNotUseSW = true clean.doNotUseSW = true
clean.simplifyBAM = true clean.simplifyBAM = true
clean.bam_compression = 1 clean.bam_compression = 1
clean.out = tmpBase + ".cleaned.bam" clean.out = tmpBase + ".cleaned.bam"
clean.jobOutputFile = chunkBase + ".cleaned.bam.out" clean.jobOutputFile = chunkBase + ".cleaned.bam.out"
clean.jobPriority = 51
clean.isIntermediate = true clean.isIntermediate = true
add(clean) add(clean)
val call = new UnifiedGenotyper with CommandLineGATKArgs val call = new UnifiedGenotyper with CommandLineGATKArgs
call.intervalsString = List(chunkInterval)
call.input_file :+= clean.out call.input_file :+= clean.out
call.intervalsString = chunkInterval
call.excludeIntervals = excludeIntervals
call.rodBind :+= RodBind("dbsnp", "VCF", dbsnp) call.rodBind :+= RodBind("dbsnp", "VCF", dbsnp)
call.downsample_to_coverage = 50 call.downsample_to_coverage = 50
call.standard_min_confidence_threshold_for_calling = 4.0 call.standard_min_confidence_threshold_for_calling = 4.0
call.standard_min_confidence_threshold_for_emitting = 4.0 call.standard_min_confidence_threshold_for_emitting = 4.0
call.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY call.baq = org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY
call.exactCalculation = org.broadinstitute.sting.gatk.walkers.genotyper.ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL
call.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH call.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.BOTH
call.GSA_PRODUCTION_ONLY = true call.GSA_PRODUCTION_ONLY = true
call.out = chunkBase + ".vcf" call.out = chunkBase + ".vcf"
call.jobOutputFile = call.out + ".out" call.jobOutputFile = call.out + ".out"
call.jobPriority = 52
add(call) add(call)
chunkVcfs :+= call.out chunkVcfs :+= call.out