From 3ec0e09edd6e6be2a3fdb519b20bbe84c9a72eb9 Mon Sep 17 00:00:00 2001
From: corin <corin@348d0f76-0448-11de-a6fe-93d51630548a>
Date: Fri, 24 Sep 2010 19:28:43 +0000
Subject: [PATCH] ADPR is now included in the full calling pipeline. The most
 up to date version of the ADPR is about to be committed and should be used
 with the script for now. The qscript now calls for two additional strings as
 inputs: the sequencing machines used and the sequencing protocol.  In order
 for ADPR to finish successfully, a squid file for both the lane and sample
 level data needs to be produced, reformatted and named
 <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files
 need to be in the working directory. When database access is ready, this and
 the protocol and sequencer parameters of the r script will go away.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4345 348d0f76-0448-11de-a6fe-93d51630548a
---
 scala/qscript/fullCallingPipeline.q | 52 +++++++++++++++++++++++++----
 1 file changed, 46 insertions(+), 6 deletions(-)
diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q
index c269a7af7..f5321e824 100755
--- a/scala/qscript/fullCallingPipeline.q
+++ b/scala/qscript/fullCallingPipeline.q
@@ -56,6 +56,15 @@ class fullCallingPipeline extends QScript {
   @Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false)
   var num_indel_scatter_jobs = 5
 
+  @Input(doc="ADPR script")
+  var adprScript: File = _
+
+  @Input(doc="Sequencing maching name (for use by adpr)")
+  var machine: String = _
+
+  @Input(doc="Sequencing experiement type (for use by adpr)--Whole_Exome, Whole_Genome, or Hybrid_Selection")
+  var protocol: String = _
+
   private var pipeline: Pipeline = _
 
   trait CommandLineGATKArgs extends CommandLineGATK {
@@ -65,6 +74,8 @@ class fullCallingPipeline extends QScript {
   }
 
 
+
+
   // ------------ SETUP THE PIPELINE ----------- //
 
 
@@ -76,7 +87,9 @@ class fullCallingPipeline extends QScript {
     // there are commands that use all the bam files
     val recalibratedSamples = qscript.pipeline.getSamples
             .filter(_.getBamFiles.contains("recalibrated"))
-
+    val adprRScript = qscript.adprScript
+    val seq = qscript.machine
+    val expKind = qscript.protocol
     for ( sample <- recalibratedSamples ) {
 
       // put unclean bams in unclean genotypers
@@ -166,12 +179,12 @@ class fullCallingPipeline extends QScript {
             .toList
 
     // actually make calls
-    endToEnd(uncleanedBase,recalibratedBamFiles)
+    endToEnd(uncleanedBase,recalibratedBamFiles, adprRscript, seq, expKind)
     // COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES
-    endToEnd(cleanedBase,cleanBamFiles)
+    endToEnd(cleanedBase,cleanBamFiles, adprRscript, seq, expKind)
   }
 
-  def endToEnd(base: String, bamFiles: List[File]) = {
+  def endToEnd(base: String, bamFiles: List[File], adprthing: File, seqinfo: String, exptype: String) = {
 
     // step through the un-indel-cleaned graph:
     // 1a. call snps and indels
@@ -306,16 +319,43 @@ class fullCallingPipeline extends QScript {
     eval.rodBind :+= RodBind("evalOptimized", "VCF", cut.out)
     eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out)
     eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator")
-    eval.out = new File(base+".eval")
+    eval.reportLocation = new File(base+".eval")
+    eval.reportType = "R"
     eval.analysisName = base+"_VariantEval"
 
     add(snps)
 
+    // 5. Run the ADPR and make pretty stuff
+
+    val adpr = new CommandLineFunction{
+     @Input(doc="Dependent files") var dependents: File = _
+     @Output(doc="Automated Data processing report") var out: File = _
+      var setname: String
+      var protocol: String
+      var sequencer: String
+      var scriptloc: File
+      def commandLine = "Rscript %s %s %s %s"
+        .format(scriptloc, setname, protocol, sequencer)
+    }
+
+    adpr.setname = base
+    adpr.scriptloc = adprthing
+    adpr.sequencer = seqinfo
+    adpr.protocol = exptype
+    adpr.dependents = eval.reportLocation
+    adpr.out = new File(base + "_adpr.pdf")
+    adpr.analysisName = base + "_ADPR"
+    //In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be
+    // produced, reformatted and named <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files
+    // to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of
+    //the r script will go away.
+
+
     for ( igv2 <- indelGenotypers ) {
       add(igv2)
     }
 
-    add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval)
+    add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval,adpr)
 
   }