diff --git a/scala/qscript/fullCallingPipeline.q b/scala/qscript/fullCallingPipeline.q index c269a7af7..f5321e824 100755 --- a/scala/qscript/fullCallingPipeline.q +++ b/scala/qscript/fullCallingPipeline.q @@ -56,6 +56,15 @@ class fullCallingPipeline extends QScript { @Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false) var num_indel_scatter_jobs = 5 + @Input(doc="ADPR script") + var adprScript: File = _ + + @Input(doc="Sequencing maching name (for use by adpr)") + var machine: String = _ + + @Input(doc="Sequencing experiement type (for use by adpr)--Whole_Exome, Whole_Genome, or Hybrid_Selection") + var protocol: String = _ + private var pipeline: Pipeline = _ trait CommandLineGATKArgs extends CommandLineGATK { @@ -65,6 +74,8 @@ class fullCallingPipeline extends QScript { } + + // ------------ SETUP THE PIPELINE ----------- // @@ -76,7 +87,9 @@ class fullCallingPipeline extends QScript { // there are commands that use all the bam files val recalibratedSamples = qscript.pipeline.getSamples .filter(_.getBamFiles.contains("recalibrated")) - + val adprRScript = qscript.adprScript + val seq = qscript.machine + val expKind = qscript.protocol for ( sample <- recalibratedSamples ) { // put unclean bams in unclean genotypers @@ -166,12 +179,12 @@ class fullCallingPipeline extends QScript { .toList // actually make calls - endToEnd(uncleanedBase,recalibratedBamFiles) + endToEnd(uncleanedBase,recalibratedBamFiles, adprRscript, seq, expKind) // COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES - endToEnd(cleanedBase,cleanBamFiles) + endToEnd(cleanedBase,cleanBamFiles, adprRscript, seq, expKind) } - def endToEnd(base: String, bamFiles: List[File]) = { + def endToEnd(base: String, bamFiles: List[File], adprthing: File, seqinfo: String, exptype: String) = { // step through the un-indel-cleaned graph: // 1a. call snps and indels @@ -306,16 +319,43 @@ class fullCallingPipeline extends QScript { eval.rodBind :+= RodBind("evalOptimized", "VCF", cut.out) eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out) eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator") - eval.out = new File(base+".eval") + eval.reportLocation = new File(base+".eval") + eval.reportType = "R" eval.analysisName = base+"_VariantEval" add(snps) + // 5. Run the ADPR and make pretty stuff + + val adpr = new CommandLineFunction{ + @Input(doc="Dependent files") var dependents: File = _ + @Output(doc="Automated Data processing report") var out: File = _ + var setname: String + var protocol: String + var sequencer: String + var scriptloc: File + def commandLine = "Rscript %s %s %s %s" + .format(scriptloc, setname, protocol, sequencer) + } + + adpr.setname = base + adpr.scriptloc = adprthing + adpr.sequencer = seqinfo + adpr.protocol = exptype + adpr.dependents = eval.reportLocation + adpr.out = new File(base + "_adpr.pdf") + adpr.analysisName = base + "_ADPR" + //In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be + // produced, reformatted and named _lanes.txt or _samps.txt, respectively. These files + // to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of + //the r script will go away. + + for ( igv2 <- indelGenotypers ) { add(igv2) } - add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval) + add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval,adpr) }