now only accepts intervals files if the user specifically requests to report bams at interval only.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5291 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
carneiro 2011-02-23 16:49:58 +00:00
parent ecfb51bcd8
commit 2a48ec1307
1 changed files with 51 additions and 49 deletions

View File

@ -25,13 +25,13 @@ class dataProcessing extends QScript {
var input: String = _ var input: String = _
@Input(doc="Reference fasta file", shortName="R", required=false) @Input(doc="Reference fasta file", shortName="R", required=false)
var reference: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta") var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19")
@Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) // todo -- accept any format. Not only VCF. @Input(doc="dbsnp ROD to use (VCF)", shortName="D", required=false) // todo -- accept any format. Not only VCF.
val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf") val dbSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf")
@Input(doc="extra VCF files to use as reference indels for Indel Realignment", shortName="indels", required=false) //todo -- once vcfs are merged, this will become the only indel vcf to be used and the merged file will be the default. @Input(doc="extra VCF files to use as reference indels for Indel Realignment", shortName="indels", required=false) //todo -- once vcfs are merged, this will become the only indel vcf to be used and the merged file will be the default.
val indels: File = _ val indels: File = null
@Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", shortName="p", required=false) @Input(doc="the project name determines the final output (BAM file) base name. Example NA12878 yields NA12878.processed.bam", shortName="p", required=false)
var projectName: String = "combined" var projectName: String = "combined"
@ -39,12 +39,11 @@ class dataProcessing extends QScript {
@Input(doc="output path", shortName="outputDir", required=false) @Input(doc="output path", shortName="outputDir", required=false)
var outputDir: String = "" var outputDir: String = ""
@Input(doc="the -L interval string to be used by GATK", shortName="L", required=false) @Input(doc="the -L interval string to be used by GATK - output bams at interval only", shortName="L", required=false)
var intervalString: String = "" var intervalString: String = ""
// todo -- this shouldn't be allowed. We want a flag that says "output bams at intervals only" or not @Input(doc="output bams at intervals only", shortName="intervals", required=false)
@Input(doc="provide a .intervals file with the list of target intervals", shortName="intervals", required=false) var intervals: File = _
var intervals: File = new File("/humgen/1kg/processing/pipeline_test_bams/whole_genome_chunked.hg19.intervals")
// todo -- let's create a pre-merged single VCF and put it into /humgen/gsa-hpprojects/GATK/data please // todo -- let's create a pre-merged single VCF and put it into /humgen/gsa-hpprojects/GATK/data please
@ -71,6 +70,22 @@ class dataProcessing extends QScript {
var perLaneBamList: List[String] = Nil var perLaneBamList: List[String] = Nil
var recalibratedBamList: List[File] = Nil var recalibratedBamList: List[File] = Nil
// Helpful variables
val outName: String = qscript.projectName
val outDir: String = qscript.outputDir
// BAM files generated by the pipeline
val bamList: String = outDir + outName + ".list"
val cleanedBam: String = outDir + outName + ".clean.bam"
val fixedBam: String = outDir + outName + ".processed.bam"
// Accessory files
val knownTargetIntervals: String = outDir + outName + ".known_indels.intervals"
val allTargetIntervals: String = outDir + outName + ".all_indels.intervals"
add(new knownTargets(knownTargetIntervals))
// Populates the list of per lane bam files to process (single bam or list of bams). // Populates the list of per lane bam files to process (single bam or list of bams).
if (input.endsWith("bam")) if (input.endsWith("bam"))
perLaneBamList :+= input perLaneBamList :+= input
@ -78,9 +93,6 @@ class dataProcessing extends QScript {
for (bam <- Source.fromFile(input).getLines()) for (bam <- Source.fromFile(input).getLines())
perLaneBamList :+= bam perLaneBamList :+= bam
perLaneBamList.foreach { perLaneBam => perLaneBamList.foreach { perLaneBam =>
// Helpful variables // Helpful variables
@ -93,15 +105,13 @@ class dataProcessing extends QScript {
val recalBam: String = baseName + ".clean.dedup.recal.bam" val recalBam: String = baseName + ".clean.dedup.recal.bam"
// Accessory files // Accessory files
val targetIntervals: String = baseName + ".indel.intervals"
val metricsFile: String = baseName + ".metrics" val metricsFile: String = baseName + ".metrics"
val preRecalFile: String = baseName + ".pre_recal.csv" val preRecalFile: String = baseName + ".pre_recal.csv"
val postRecalFile: String = baseName + ".post_recal.csv" val postRecalFile: String = baseName + ".post_recal.csv"
val preOutPath: String = baseName + ".pre" val preOutPath: String = baseName + ".pre"
val postOutPath: String = baseName + ".post" val postOutPath: String = baseName + ".post"
add(new target(perLaneBam, targetIntervals), add(new clean(perLaneBam, knownTargetIntervals, cleanedBam, knownsOnly, intermediate),
new clean(perLaneBam, targetIntervals, cleanedBam, knownsOnly, intermediate),
new dedup(cleanedBam, dedupedBam, metricsFile), new dedup(cleanedBam, dedupedBam, metricsFile),
new cov(dedupedBam, preRecalFile), new cov(dedupedBam, preRecalFile),
new recal(dedupedBam, preRecalFile, recalBam), new recal(dedupedBam, preRecalFile, recalBam),
@ -112,24 +122,23 @@ class dataProcessing extends QScript {
recalibratedBamList :+= new File(recalBam) recalibratedBamList :+= new File(recalBam)
} }
// Helpful variables
val outName: String = qscript.projectName
val outDir: String = qscript.outputDir
// BAM files generated by the pipeline
val bamList: String = outDir + outName + ".list"
val cleanedBam: String = outDir + outName + ".clean.bam"
val fixedBam: String = outDir + outName + ".processed.bam"
// Accessory files
val targetIntervals: String = outDir + outName + ".indel.intervals"
add(new writeList(recalibratedBamList, bamList), add(new writeList(recalibratedBamList, bamList),
new target(bamList, targetIntervals), // todo -- reuse previously generated intervals (see how to do that) new allTargets(bamList, allTargetIntervals),
new clean(bamList, targetIntervals, cleanedBam, !knownsOnly, !intermediate)) new clean(bamList, allTargetIntervals, cleanedBam, !knownsOnly, !intermediate))
} }
class target (inBams: String, outIntervals: String) extends RealignerTargetCreator with CommandLineGATKArgs { class knownTargets (outIntervals: String) extends RealignerTargetCreator with CommandLineGATKArgs {
this.out = new File(outIntervals)
this.mismatchFraction = Some(0.0)
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.rodBind :+= RodBind("indels1", "VCF", dindelPilotCalls)
this.rodBind :+= RodBind("indels2", "VCF", dindelAFRCalls)
this.rodBind :+= RodBind("indels3", "VCF", dindelEURCalls)
this.rodBind :+= RodBind("indels4", "VCF", dindelASNCalls)
}
class allTargets (inBams: String, outIntervals: String) extends knownTargets(outIntervals) {
this.input_file :+= new File(inBams) this.input_file :+= new File(inBams)
this.out = new File(outIntervals) this.out = new File(outIntervals)
this.mismatchFraction = Some(0.0) this.mismatchFraction = Some(0.0)
@ -139,17 +148,13 @@ class dataProcessing extends QScript {
this.rodBind :+= RodBind("indels3", "VCF", dindelEURCalls) this.rodBind :+= RodBind("indels3", "VCF", dindelEURCalls)
this.rodBind :+= RodBind("indels4", "VCF", dindelASNCalls) this.rodBind :+= RodBind("indels4", "VCF", dindelASNCalls)
if (qscript.indels != null) this.rodBind :+= RodBind("indels5", "VCF", qscript.indels) if (qscript.indels != null) this.rodBind :+= RodBind("indels5", "VCF", qscript.indels)
this.jobName = inBams + ".tgt" this.jobName = outIntervals + ".target"
if (!qscript.intervalString.isEmpty()) this.intervalsString :+= qscript.intervalString
else this.intervals :+= qscript.intervals
} }
class clean (inBams: String, tIntervals: String, outBam: String, knownsOnly: Boolean, intermediate: Boolean) extends IndelRealigner with CommandLineGATKArgs { class clean (inBams: String, tIntervals: String, outBam: String, knownsOnly: Boolean, intermediate: Boolean) extends IndelRealigner with CommandLineGATKArgs {
this.input_file :+= new File(inBams) this.input_file :+= new File(inBams)
this.targetIntervals = new File(tIntervals) this.targetIntervals = new File(tIntervals)
this.out = new File(outBam) this.out = new File(outBam)
this.doNotUseSW = true
this.baq = Some(org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY)
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
this.rodBind :+= RodBind("indels1", "VCF", dindelPilotCalls) this.rodBind :+= RodBind("indels1", "VCF", dindelPilotCalls)
this.rodBind :+= RodBind("indels2", "VCF", dindelAFRCalls) this.rodBind :+= RodBind("indels2", "VCF", dindelAFRCalls)
@ -159,27 +164,20 @@ class dataProcessing extends QScript {
this.useOnlyKnownIndels = knownsOnly this.useOnlyKnownIndels = knownsOnly
this.sortInCoordinateOrderEvenThoughItIsHighlyUnsafe = true this.sortInCoordinateOrderEvenThoughItIsHighlyUnsafe = true
this.constrainMovement = true this.constrainMovement = true
this.doNotUseSW = true
this.baq = Some(org.broadinstitute.sting.utils.baq.BAQ.CalculationMode.CALCULATE_AS_NECESSARY)
this.compress = Some(0)
this.isIntermediate = intermediate this.isIntermediate = intermediate
this.jobName = inBams + ".clean" this.jobName = outBam + ".clean"
if (!qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString) if (!intermediate && !qscript.intervalString.isEmpty()) this.intervalsString ++= List(qscript.intervalString)
else this.intervals :+= qscript.intervals if (!intermediate && qscript.intervals != null) this.intervals :+= qscript.intervals
}
class fixMates (inBam: String, outBam: String, intermediate: Boolean) extends PicardBamJarFunction {
@Input(doc="cleaned bam") var cleaned: File = new File(inBam)
@Output(doc="fixed bam") var fixed: File = new File(outBam)
override def inputBams = List(cleaned)
override def outputBam = fixed
override def commandLine = super.commandLine + " CREATE_INDEX=true"
this.jarFile = qscript.fixMatesJar
this.isIntermediate = intermediate
this.memoryLimit = Some(6)
this.jobName = inBam + ".fix"
} }
class dedup (inBam: String, outBam: String, metricsFile: String) extends PicardBamJarFunction { class dedup (inBam: String, outBam: String, metricsFile: String) extends PicardBamJarFunction {
@Input(doc="fixed bam") var clean: File = new File(inBam) @Input(doc="fixed bam") var clean: File = new File(inBam)
@Output(doc="deduped bam") var deduped: File = new File(outBam) @Output(doc="deduped bam") var deduped: File = new File(outBam)
@Output(doc="deduped bam index") var dedupedIndex: File = new File(outBam + ".bai")
@Output(doc="metrics file") var metrics: File = new File(metricsFile)
override def inputBams = List(clean) override def inputBams = List(clean)
override def outputBam = deduped override def outputBam = deduped
override def commandLine = super.commandLine + " M=" + metricsFile + " CREATE_INDEX=true" override def commandLine = super.commandLine + " M=" + metricsFile + " CREATE_INDEX=true"
@ -187,7 +185,7 @@ class dataProcessing extends QScript {
this.memoryLimit = Some(6) this.memoryLimit = Some(6)
this.jarFile = qscript.dedupJar this.jarFile = qscript.dedupJar
this.isIntermediate = true this.isIntermediate = true
this.jobName = inBam + ".dedup" this.jobName = outBam + ".dedup"
} }
class cov (inBam: String, outRecalFile: String) extends CountCovariates with CommandLineGATKArgs { class cov (inBam: String, outRecalFile: String) extends CountCovariates with CommandLineGATKArgs {
@ -195,13 +193,16 @@ class dataProcessing extends QScript {
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= new File(inBam) this.input_file :+= new File(inBam)
this.recal_file = new File(outRecalFile) this.recal_file = new File(outRecalFile)
this.jobName = outRecalFile + ".covariates"
} }
class recal (inBam: String, inRecalFile: String, outBam: String) extends TableRecalibration with CommandLineGATKArgs { class recal (inBam: String, inRecalFile: String, outBam: String) extends TableRecalibration with CommandLineGATKArgs {
@Output(doc="recalibrated bam index") var recalIndex: File = new File(outBam + ".bai")
this.input_file :+= new File (inBam) this.input_file :+= new File (inBam)
this.recal_file = new File(inRecalFile) this.recal_file = new File(inRecalFile)
this.out = new File(outBam) this.out = new File(outBam)
this.index_output_bam_on_the_fly = Some(true) this.index_output_bam_on_the_fly = Some(true)
this.jobName = outBam + ".recalibration"
} }
class analyzeCovariates (inRecalFile: String, outPath: String) extends AnalyzeCovariates { class analyzeCovariates (inRecalFile: String, outPath: String) extends AnalyzeCovariates {
@ -209,11 +210,12 @@ class dataProcessing extends QScript {
this.resources = qscript.R this.resources = qscript.R
this.recal_file = new File(inRecalFile) this.recal_file = new File(inRecalFile)
this.output_dir = outPath this.output_dir = outPath
this.jobName = inRecalFile + ".analyze_covariates"
} }
class writeList(inBams: List[File], outBamList: String) extends ListWriterFunction { class writeList(inBams: List[File], outBamList: String) extends ListWriterFunction {
this.inputFiles = inBams this.inputFiles = inBams
this.listFile = new File(outBamList) this.listFile = new File(outBamList)
this.jobName = "bamList" this.jobName = outBamList + ".bamList"
} }
} }