Full calling pipeline now calls indels through the indel genotyper, merges with combine variants, and filters on them. Since new genomic annotator is fast, it is no longer scatter-gathered.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4144 348d0f76-0448-11de-a6fe-93d51630548a
2010-08-27 16:28:24 +00:00 · 2010-08-27 16:28:24 +00:00 · 7908237b90
parent 78946c4ffd
commit 7908237b90
1 changed files with 30 additions and 12 deletions
--- a/scala/qscript/fullCallingPipeline.q
+++ b/scala/qscript/fullCallingPipeline.q
@ -45,6 +45,9 @@ class fullCallingPipeline extends QScript {
  @Input(doc="SNP cluster filter -- window size",shortName="snpClusterWindow",required=false)
  var snpClusterWindow = 7
  @Input(doc="dbSNP version",shortName="D")
  var dbSNP: File = _           
  trait CommandLineGATKArgs extends CommandLineGATK {
@ -110,7 +113,6 @@ class fullCallingPipeline extends QScript {
    snps.standard_min_confidence_threshold_for_emitting = Some(10)
    snps.min_mapping_quality_score = Some(20)
    snps.min_base_quality_score = Some(20)
    snps.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
    snps.downsample_to_coverage = Some(200)
    snps.annotation :+= "QualByDepthV2"
@ -129,16 +131,32 @@ class fullCallingPipeline extends QScript {
    snps.scatterCount = 50
-    val indels = new UnifiedGenotyper with CommandLineGATKArgs
+    // indel genotyper does one sample at a time
-    indels.input_file = bamFiles
+    val indelCallFiles = List.empty[RodBind]
-    indels.downsampling_type = Some(DownsampleType.EXPERIMENTAL_BY_SAMPLE)
+    val loopNo = 0
-    indels.downsample_to_coverage = Some(200)
+    val priority = ""
-    indels.variants_out = base+".indels.vcf"
+    for ( bam <- bamFiles ) {
-    indels.genotype_model = Some(Model.INDELS)
+      val indel = new IndelGenotyperV2 with CommandLineGATKArgs
-    indels.scatterCount = 50
+      indel.input_file :+= bam
      indel.out = swapExt(bam,".bam",".indels.vcf")
      indel.downsample_to_coverage = Some(500)
      indelCallFiles :+= new RodBind("v"+loopNo.toString, "VCF", indel.out)
      if ( loopNo == 0 ) {
        priority = "v0"
      } else {
        priority += ",v"+loopNo.toString
      }
      loopNo += 1
    }
    val mergeIndels = new CombineVariants with CommandLineGATKArgs
    mergeIndels.out = qscript.project+".indels.vcf"
    mergeIndels.genotypemergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.GenotypeMergeType.REQUIRE_UNIQUE
    mergeIndels.priority = priority
    mergeIndels.variantmergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION
    mergeIndels.rodBind = indelCallFiles
-    // 1b. genomically annotate SNPs -- slow, but scatter it
+    // 1b. genomically annotate SNPs -- no longer slow
    val annotated = new GenomicAnnotator with CommandLineGATKArgs
    annotated.rodBind :+= RodBind("variant", "VCF", new File(snps.variants_out))
    annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.refseqTable)
@ -146,13 +164,12 @@ class fullCallingPipeline extends QScript {
    annotated.vcfOutput = swapExt(new File(snps.variants_out),".vcf",".annotated.vcf").getAbsolutePath
    annotated.select :+= "dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet"
    annotated.rodToIntervalTrackName = "variant"
    annotated.scatterCount = 100
    // 2.a filter on cluster and near indels
    val masker = new VariantFiltration with CommandLineGATKArgs
    masker.rodBind :+= RodBind("variant", "VCF", new File(annotated.vcfOutput))
-    masker.rodBind :+= RodBind("mask", "VCF", new File(indels.variants_out))
+    masker.rodBind :+= RodBind("mask", "VCF", new File(mergeIndels.out))
    masker.maskName = "NearIndel"
    masker.clusterWindowSize = Some(qscript.snpClusterWindow)
    masker.clusterSize = Some(qscript.snpsInCluster)
@ -173,7 +190,8 @@ class fullCallingPipeline extends QScript {
    // todo -- args for resources (properties file)
    val clusters = new GenerateVariantClusters with CommandLineGATKArgs
    clusters.rodBind :+= RodBind("input", "VCF", masker.out)
-    val clusters_clusterFile = swapExt(new File(snps.variants_out),".vcf",".cluster").getAbsolutePath
+    val clusters_clusterFile = swapExt(new File(snps.variants_out),".vcf",".cluster")
    clusters.clusterFile = clusters_clusterFile
    clusters.memoryLimit = Some(8)
    clusters.jobQueue = "hugemem"