Full calling pipeline now calls indels through the indel genotyper, merges with combine variants, and filters on them. Since new genomic annotator is fast, it is no longer scatter-gathered.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4144 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
78946c4ffd
commit
7908237b90
|
|
@ -45,6 +45,9 @@ class fullCallingPipeline extends QScript {
|
||||||
@Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false)
|
@Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false)
|
||||||
var snpClusterWindow = 7
|
var snpClusterWindow = 7
|
||||||
|
|
||||||
|
@Input(doc="dbSNP version",shortName="D")
|
||||||
|
var dbSNP: File = _
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||||
|
|
@ -110,7 +113,6 @@ class fullCallingPipeline extends QScript {
|
||||||
snps.standard_min_confidence_threshold_for_emitting = Some(10)
|
snps.standard_min_confidence_threshold_for_emitting = Some(10)
|
||||||
snps.min_mapping_quality_score = Some(20)
|
snps.min_mapping_quality_score = Some(20)
|
||||||
snps.min_base_quality_score = Some(20)
|
snps.min_base_quality_score = Some(20)
|
||||||
snps.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
|
|
||||||
snps.downsample_to_coverage = Some(200)
|
snps.downsample_to_coverage = Some(200)
|
||||||
snps.annotation :+= "QualByDepthV2"
|
snps.annotation :+= "QualByDepthV2"
|
||||||
|
|
||||||
|
|
@ -129,16 +131,32 @@ class fullCallingPipeline extends QScript {
|
||||||
snps.scatterCount = 50
|
snps.scatterCount = 50
|
||||||
|
|
||||||
|
|
||||||
val indels = new UnifiedGenotyper with CommandLineGATKArgs
|
// indel genotyper does one sample at a time
|
||||||
indels.input_file = bamFiles
|
val indelCallFiles = List.empty[RodBind]
|
||||||
indels.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
|
val loopNo = 0
|
||||||
indels.downsample_to_coverage = Some(200)
|
val priority = ""
|
||||||
indels.variants_out = base+".indels.vcf"
|
for ( bam <- bamFiles ) {
|
||||||
indels.genotype_model = Some(Model.INDELS)
|
val indel = new IndelGenotyperV2 with CommandLineGATKArgs
|
||||||
indels.scatterCount = 50
|
indel.input_file :+= bam
|
||||||
|
indel.out = swapExt(bam,".bam",".indels.vcf")
|
||||||
|
indel.downsample_to_coverage = Some(500)
|
||||||
|
indelCallFiles :+= new RodBind("v"+loopNo.toString, "VCF", indel.out)
|
||||||
|
if ( loopNo == 0 ) {
|
||||||
|
priority = "v0"
|
||||||
|
} else {
|
||||||
|
priority += ",v"+loopNo.toString
|
||||||
|
}
|
||||||
|
loopNo += 1
|
||||||
|
}
|
||||||
|
val mergeIndels = new CombineVariants with CommandLineGATKArgs
|
||||||
|
mergeIndels.out = qscript.project+".indels.vcf"
|
||||||
|
mergeIndels.genotypemergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE
|
||||||
|
mergeIndels.priority = priority
|
||||||
|
mergeIndels.variantmergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION
|
||||||
|
mergeIndels.rodBind = indelCallFiles
|
||||||
|
|
||||||
|
|
||||||
// 1b. genomically annotate SNPs -- slow, but scatter it
|
// 1b. genomically annotate SNPs -- no longer slow
|
||||||
val annotated = new GenomicAnnotator with CommandLineGATKArgs
|
val annotated = new GenomicAnnotator with CommandLineGATKArgs
|
||||||
annotated.rodBind :+= RodBind("variant", "VCF", new File(snps.variants_out))
|
annotated.rodBind :+= RodBind("variant", "VCF", new File(snps.variants_out))
|
||||||
annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
|
annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
|
||||||
|
|
@ -146,13 +164,12 @@ class fullCallingPipeline extends QScript {
|
||||||
annotated.vcfOutput = swapExt(new File(snps.variants_out),".vcf",".annotated.vcf").getAbsolutePath
|
annotated.vcfOutput = swapExt(new File(snps.variants_out),".vcf",".annotated.vcf").getAbsolutePath
|
||||||
annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
|
annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
|
||||||
annotated.rodToIntervalTrackName = "variant"
|
annotated.rodToIntervalTrackName = "variant"
|
||||||
annotated.scatterCount = 100
|
|
||||||
|
|
||||||
|
|
||||||
// 2.a filter on cluster and near indels
|
// 2.a filter on cluster and near indels
|
||||||
val masker = new VariantFiltration with CommandLineGATKArgs
|
val masker = new VariantFiltration with CommandLineGATKArgs
|
||||||
masker.rodBind :+= RodBind("variant", "VCF", new File(annotated.vcfOutput))
|
masker.rodBind :+= RodBind("variant", "VCF", new File(annotated.vcfOutput))
|
||||||
masker.rodBind :+= RodBind("mask", "VCF", new File(indels.variants_out))
|
masker.rodBind :+= RodBind("mask", "VCF", new File(mergeIndels.out))
|
||||||
masker.maskName = "NearIndel"
|
masker.maskName = "NearIndel"
|
||||||
masker.clusterWindowSize = Some(qscript.snpClusterWindow)
|
masker.clusterWindowSize = Some(qscript.snpClusterWindow)
|
||||||
masker.clusterSize = Some(qscript.snpsInCluster)
|
masker.clusterSize = Some(qscript.snpsInCluster)
|
||||||
|
|
@ -173,7 +190,8 @@ class fullCallingPipeline extends QScript {
|
||||||
// todo -- args for resources (properties file)
|
// todo -- args for resources (properties file)
|
||||||
val clusters = new GenerateVariantClusters with CommandLineGATKArgs
|
val clusters = new GenerateVariantClusters with CommandLineGATKArgs
|
||||||
clusters.rodBind :+= RodBind("input", "VCF", masker.out)
|
clusters.rodBind :+= RodBind("input", "VCF", masker.out)
|
||||||
val clusters_clusterFile = swapExt(new File(snps.variants_out),".vcf",".cluster").getAbsolutePath
|
val clusters_clusterFile = swapExt(new File(snps.variants_out),".vcf",".cluster")
|
||||||
|
clusters.clusterFile = clusters_clusterFile
|
||||||
clusters.memoryLimit = Some(8)
|
clusters.memoryLimit = Some(8)
|
||||||
clusters.jobQueue = "hugemem"
|
clusters.jobQueue = "hugemem"
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue