ADPR is now included in the full calling pipeline. The most up to date version of the ADPR is about to be committed and should be used with the script for now. The qscript now calls for two additional strings as inputs: the sequencing machines used and the sequencing protocol. In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be produced, reformatted and named <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files need to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of the r script will go away.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4345 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
corin 2010-09-24 19:28:43 +00:00
parent 0cc48d46ec
commit 3ec0e09edd
1 changed files with 46 additions and 6 deletions

View File

@ -56,6 +56,15 @@ class fullCallingPipeline extends QScript {
@Input(doc="Number of jobs to scatter indel genotyper",shortName="indelScatter",required=false)
var num_indel_scatter_jobs = 5
@Input(doc="ADPR script")
var adprScript: File = _
@Input(doc="Sequencing maching name (for use by adpr)")
var machine: String = _
@Input(doc="Sequencing experiement type (for use by adpr)--Whole_Exome, Whole_Genome, or Hybrid_Selection")
var protocol: String = _
private var pipeline: Pipeline = _
trait CommandLineGATKArgs extends CommandLineGATK {
@ -65,6 +74,8 @@ class fullCallingPipeline extends QScript {
}
// ------------ SETUP THE PIPELINE ----------- //
@ -76,7 +87,9 @@ class fullCallingPipeline extends QScript {
// there are commands that use all the bam files
val recalibratedSamples = qscript.pipeline.getSamples
.filter(_.getBamFiles.contains("recalibrated"))
val adprRScript = qscript.adprScript
val seq = qscript.machine
val expKind = qscript.protocol
for ( sample <- recalibratedSamples ) {
// put unclean bams in unclean genotypers
@ -166,12 +179,12 @@ class fullCallingPipeline extends QScript {
.toList
// actually make calls
endToEnd(uncleanedBase,recalibratedBamFiles)
endToEnd(uncleanedBase,recalibratedBamFiles, adprRscript, seq, expKind)
// COMMENT THIS NEXT LINE TO AVOID CALLING ON CLEANED FILES
endToEnd(cleanedBase,cleanBamFiles)
endToEnd(cleanedBase,cleanBamFiles, adprRscript, seq, expKind)
}
def endToEnd(base: String, bamFiles: List[File]) = {
def endToEnd(base: String, bamFiles: List[File], adprthing: File, seqinfo: String, exptype: String) = {
// step through the un-indel-cleaned graph:
// 1a. call snps and indels
@ -306,16 +319,43 @@ class fullCallingPipeline extends QScript {
eval.rodBind :+= RodBind("evalOptimized", "VCF", cut.out)
eval.rodBind :+= RodBind("evalHandFiltered", "VCF", handFilter.out)
eval.evalModule ++= List("CountFunctionalClasses", "CompOverlap", "CountVariants", "TiTvVariantEvaluator")
eval.out = new File(base+".eval")
eval.reportLocation = new File(base+".eval")
eval.reportType = "R"
eval.analysisName = base+"_VariantEval"
add(snps)
// 5. Run the ADPR and make pretty stuff
val adpr = new CommandLineFunction{
@Input(doc="Dependent files") var dependents: File = _
@Output(doc="Automated Data processing report") var out: File = _
var setname: String
var protocol: String
var sequencer: String
var scriptloc: File
def commandLine = "Rscript %s %s %s %s"
.format(scriptloc, setname, protocol, sequencer)
}
adpr.setname = base
adpr.scriptloc = adprthing
adpr.sequencer = seqinfo
adpr.protocol = exptype
adpr.dependents = eval.reportLocation
adpr.out = new File(base + "_adpr.pdf")
adpr.analysisName = base + "_ADPR"
//In order for ADPR to finish successfully, a squid file for both the lane and sample level data needs to be
// produced, reformatted and named <projectBase>_lanes.txt or <projectBase>_samps.txt, respectively. These files
// to be in the working directory. When database access is ready, this and the protocol and sequencer parameters of
//the r script will go away.
for ( igv2 <- indelGenotypers ) {
add(igv2)
}
add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval)
add(mergeIndels,annotated,masker,handFilter,clusters,recalibrate,cut,eval,adpr)
}