script to test BQSR on good/bad regions
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5743 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e5cc0f4eec
commit
e2b9227d8d
|
|
@ -0,0 +1,116 @@
|
|||
package oneoffs.carneiro
|
||||
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 4/20/11
|
||||
* Time: 16:29 PM
|
||||
*/
|
||||
|
||||
|
||||
class justRecalibrate extends QScript {
|
||||
|
||||
@Input(doc="path to GenomeAnalysisTK.jar", shortName="gatk", required=true)
|
||||
var GATKjar: File = _
|
||||
|
||||
@Input(doc="input BAM file - or list of BAM files", shortName="i", required=true)
|
||||
var input: File = _
|
||||
|
||||
@Input(doc="path to AnalyzeCovariates.jar", fullName="path_to_ac_jar", shortName="ac", required=false)
|
||||
var ACJar: File = new File("/humgen/gsa-scr1/carneiro/stable/dist/AnalyzeCovariates.jar")
|
||||
|
||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=false)
|
||||
var R: String = new File("/humgen/gsa-scr1/carneiro/stable/R")
|
||||
|
||||
@Input(doc="bad regions interval", shortName="bad", required=false)
|
||||
var badInterval: File = new File("/humgen/gsa-hpprojects/dev/carneiro/goodbad/data/bad_regions.hg19.intervals")
|
||||
|
||||
@Input(doc="Reference fasta file", shortName="R", required=false)
|
||||
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
|
||||
|
||||
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false)
|
||||
var dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
|
||||
|
||||
val queueLogDir: String = ".qlog/"
|
||||
|
||||
|
||||
def script = {
|
||||
|
||||
val first: Boolean = true
|
||||
val bad: Boolean = true
|
||||
|
||||
val badRecalFile1: File = new File("bad_recal1.csv");
|
||||
val badRecalFile2: File = new File("bad_recal2.csv");
|
||||
val badBam: File = new File("bad.bam");
|
||||
val badPath1: String = "bad1";
|
||||
val badPath2: String = "bad2";
|
||||
|
||||
val goodRecalFile1: File = new File("good_recal1.csv")
|
||||
val goodRecalFile2: File = new File("good_recal2.csv")
|
||||
val goodBam: File = new File("good.bam")
|
||||
val goodPath1: String = "good1"
|
||||
val goodPath2: String = "good2"
|
||||
|
||||
add(cov(input, badRecalFile1, first, bad),
|
||||
recal(input, badRecalFile1, badBam, bad),
|
||||
cov(badBam, badRecalFile2, !first, bad),
|
||||
analyzeCovariates(badRecalFile1, badPath1),
|
||||
analyzeCovariates(badRecalFile2, badPath2))
|
||||
|
||||
add(cov(input, goodRecalFile1, first, !bad),
|
||||
recal(input, goodRecalFile1, goodBam, !bad),
|
||||
cov(goodBam, goodRecalFile2, !first, !bad),
|
||||
analyzeCovariates(goodRecalFile1, goodPath1),
|
||||
analyzeCovariates(goodRecalFile2, goodPath2))
|
||||
}
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.jarFile = GATKjar
|
||||
this.reference_sequence = reference
|
||||
this.memoryLimit = 4
|
||||
this.isIntermediate = true
|
||||
}
|
||||
|
||||
case class cov (inBam: File, outRecalFile: File, FIRST: Boolean, BAD: Boolean) extends CountCovariates with CommandLineGATKArgs {
|
||||
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
|
||||
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = outRecalFile
|
||||
this.useOriginalQualities = FIRST
|
||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||
if (BAD) {
|
||||
this.intervals :+= badInterval
|
||||
this.scatterCount = 85
|
||||
}
|
||||
else
|
||||
this.excludeIntervals :+= badInterval
|
||||
}
|
||||
|
||||
case class recal (inBam: File, inRecalFile: File, outBam: File, BAD: Boolean) extends TableRecalibration with CommandLineGATKArgs {
|
||||
this.input_file :+= inBam
|
||||
this.recal_file = inRecalFile
|
||||
this.out = outBam
|
||||
this.isIntermediate = false
|
||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||
if (BAD) {
|
||||
this.intervals :+= badInterval
|
||||
this.scatterCount = 85
|
||||
}
|
||||
else
|
||||
this.excludeIntervals :+= badInterval
|
||||
}
|
||||
|
||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||
this.jarFile = ACJar
|
||||
this.resources = R
|
||||
this.recal_file = inRecalFile
|
||||
this.output_dir = outPath.toString
|
||||
this.analysisName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue