ADPR is now included in the full calling pipeline. The most up to date version of the ADPR is about to be committed and should be used with the script for now. The qscript now calls for two additional strings as inputs: the sequencing machines used and the sequencing protocol. In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be produced, reformatted and named <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files need to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of the r script will go away.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4345 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-24 19:28:43 +00:00 · 2010-09-24 19:28:43 +00:00 · 3ec0e09edd
parent 0cc48d46ec
commit 3ec0e09edd
1 changed files with 46 additions and 6 deletions
--- a/scala/qscript/fullCallingPipeline.q
+++ b/scala/qscript/fullCallingPipeline.q
@ -56,6 +56,15 @@ class fullCallingPipeline extends QScript {
  @Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false)
  var num_indel_scatter_jobs = 5

+  @Input(doc="ADPR script")
+  var adprScript: File = _
+
+  @Input(doc="Sequencing maching name (for use by adpr)")
+  var machine: String = _
+
+  @Input(doc="Sequencing experiement type (for use by adpr)--Whole_Exome, Whole_Genome, or Hybrid_Selection")
+  var protocol: String = _
+
  private var pipeline: Pipeline = _

  trait CommandLineGATKArgs extends CommandLineGATK {
@ -65,6 +74,8 @@ class fullCallingPipeline extends QScript {
  }


+
+
  // ------------ SETUP THE PIPELINE ----------- //


@ -76,7 +87,9 @@ class fullCallingPipeline extends QScript {
    // there are commands that use all the bam files
    val recalibratedSamples = qscript.pipeline.getSamples
            .filter(_.getBamFiles.contains("recalibrated"))
-
+    val adprRScript = qscript.adprScript
+    val seq = qscript.machine
+    val expKind = qscript.protocol
    for ( sample <- recalibratedSamples ) {

      // put unclean bams in unclean genotypers
@ -166,12 +179,12 @@ class fullCallingPipeline extends QScript {
            .toList

    // actually make calls
-    endToEnd(uncleanedBase,recalibratedBamFiles)
+    endToEnd(uncleanedBase,recalibratedBamFiles, adprRscript, seq, expKind)
    // COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES
-    endToEnd(cleanedBase,cleanBamFiles)
+    endToEnd(cleanedBase,cleanBamFiles, adprRscript, seq, expKind)
  }

-  def endToEnd(base: String, bamFiles: List[File]) = {
+  def endToEnd(base: String, bamFiles: List[File], adprthing: File, seqinfo: String, exptype: String) = {

    // step through the un-indel-cleaned graph:
    // 1a. call snps and indels
@ -306,16 +319,43 @@ class fullCallingPipeline extends QScript {
    eval.rodBind :+= RodBind("evalOptimized", "VCF", cut.out)
    eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out)
    eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator")
-    eval.out = new File(base+".eval")
+    eval.reportLocation = new File(base+".eval")
+    eval.reportType = "R"
    eval.analysisName = base+"_VariantEval"

    add(snps)

+    // 5. Run the ADPR and make pretty stuff
+
+    val adpr = new CommandLineFunction{
+     @Input(doc="Dependent files") var dependents: File = _
+     @Output(doc="Automated Data processing report") var out: File = _
+      var setname: String
+      var protocol: String
+      var sequencer: String
+      var scriptloc: File
+      def commandLine = "Rscript %s %s %s %s"
+        .format(scriptloc, setname, protocol, sequencer)
+    }
+
+    adpr.setname = base
+    adpr.scriptloc = adprthing
+    adpr.sequencer = seqinfo
+    adpr.protocol = exptype
+    adpr.dependents = eval.reportLocation
+    adpr.out = new File(base + "_adpr.pdf")
+    adpr.analysisName = base + "_ADPR"
+    //In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be
+    // produced, reformatted and named <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files
+    // to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of
+    //the r script will go away.
+
+
    for ( igv2 <- indelGenotypers ) {
      add(igv2)
    }

-    add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval)
+    add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval,adpr)

  }