* added optional 'no validation' for the Data Processing pipeline.

* some simplifications on the picard classes
This commit is contained in:
Mauricio Carneiro 2011-07-18 23:30:31 -04:00
parent 4cf7a2af23
commit 2b465ab43b
1 changed files with 29 additions and 27 deletions

View File

@ -72,6 +72,9 @@ class DataProcessingPipeline extends QScript {
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
var bwaThreads: Int = 1 var bwaThreads: Int = 1
@Input(doc="Dont perform validation on the BAM files", fullName="no_validation", shortName="nv", required=false)
var noValidation: Boolean = false
/**************************************************************************** /****************************************************************************
* Global Variables * Global Variables
@ -242,7 +245,12 @@ class DataProcessingPipeline extends QScript {
val preValidateLog = swapExt(bam, ".bam", ".pre.validation") val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
val postValidateLog = swapExt(bam, ".bam", ".post.validation") val postValidateLog = swapExt(bam, ".bam", ".post.validation")
add(validate(bam, preValidateLog)) // Validation is an optional step for the BAM file generated after
// alignment and the final bam file of the pipeline.
if (!noValidation) {
add(validate(bam, preValidateLog),
validate(recalBam, postValidateLog))
}
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
add(target(bam, targetIntervals)) add(target(bam, targetIntervals))
@ -253,8 +261,8 @@ class DataProcessingPipeline extends QScript {
recal(dedupedBam, preRecalFile, recalBam), recal(dedupedBam, preRecalFile, recalBam),
cov(recalBam, postRecalFile), cov(recalBam, postRecalFile),
analyzeCovariates(preRecalFile, preOutPath), analyzeCovariates(preRecalFile, preOutPath),
analyzeCovariates(postRecalFile, postOutPath), analyzeCovariates(postRecalFile, postOutPath))
validate(recalBam, postValidateLog))
cohortList :+= recalBam cohortList :+= recalBam
} }
@ -278,6 +286,13 @@ class DataProcessingPipeline extends QScript {
this.isIntermediate = true this.isIntermediate = true
} }
// General arguments to non-GATK tools
trait ExternalCommonArgs extends CommandLineFunction {
this.memoryLimit = 4
this.isIntermediate = true
}
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs { case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
this.input_file :+= inBams this.input_file :+= inBams
@ -296,8 +311,8 @@ class DataProcessingPipeline extends QScript {
this.targetIntervals = tIntervals this.targetIntervals = tIntervals
this.out = outBam this.out = outBam
this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP) this.rodBind :+= RodBind("dbsnp", "VCF", dbSNP)
if (!indels.isEmpty) if (!qscript.indels.isEmpty)
this.rodBind :+= RodBind("indels", "VCF", indels) this.rodBind :+= RodBind("indels", "VCF", qscript.indels)
this.consensusDeterminationModel = consensusDeterminationModel this.consensusDeterminationModel = consensusDeterminationModel
this.compress = 0 this.compress = 0
this.scatterCount = nContigs this.scatterCount = nContigs
@ -328,7 +343,6 @@ class DataProcessingPipeline extends QScript {
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outBam + ".recalibration" this.analysisName = queueLogDir + outBam + ".recalibration"
this.jobName = queueLogDir + outBam + ".recalibration" this.jobName = queueLogDir + outBam + ".recalibration"
} }
@ -346,48 +360,41 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
} }
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates { case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
this.input = List(inBam) this.input = List(inBam)
this.output = outBam this.output = outBam
this.metrics = metricsFile this.metrics = metricsFile
this.memoryLimit = 6
this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".dedup" this.analysisName = queueLogDir + outBam + ".dedup"
this.jobName = queueLogDir + outBam + ".dedup" this.jobName = queueLogDir + outBam + ".dedup"
} }
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles { case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
this.input = inBams this.input = inBams
this.output = outBam this.output = outBam
this.memoryLimit = 4
this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".joinBams" this.analysisName = queueLogDir + outBam + ".joinBams"
this.jobName = queueLogDir + outBam + ".joinBams" this.jobName = queueLogDir + outBam + ".joinBams"
} }
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam { case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
this.input = List(inSam) this.input = List(inSam)
this.output = outBam this.output = outBam
this.sortOrder = sortOrderP this.sortOrder = sortOrderP
this.memoryLimit = 4
this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".sortSam" this.analysisName = queueLogDir + outBam + ".sortSam"
this.jobName = queueLogDir + outBam + ".sortSam" this.jobName = queueLogDir + outBam + ".sortSam"
} }
case class validate (inBam: File, outLog: File) extends ValidateSamFile { case class validate (inBam: File, outLog: File) extends ValidateSamFile with ExternalCommonArgs {
this.input = List(inBam) this.input = List(inBam)
this.output = outLog this.output = outLog
this.maxRecordsInRam = 100000 this.maxRecordsInRam = 100000
this.REFERENCE_SEQUENCE = qscript.reference this.REFERENCE_SEQUENCE = qscript.reference
this.memoryLimit = 4
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outLog + ".validate" this.analysisName = queueLogDir + outLog + ".validate"
this.jobName = queueLogDir + outLog + ".validate" this.jobName = queueLogDir + outLog + ".validate"
} }
case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups { case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups with ExternalCommonArgs {
this.input = List(inBam) this.input = List(inBam)
this.output = outBam this.output = outBam
this.RGID = readGroup.id this.RGID = readGroup.id
@ -403,12 +410,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".rg" this.jobName = queueLogDir + outBam + ".rg"
} }
trait BWACommonArgs extends CommandLineFunction { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
this.memoryLimit = 4
this.isIntermediate = true
}
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Output(doc="output sai file") var sai = outSai @Output(doc="output sai file") var sai = outSai
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai
@ -416,7 +418,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se"
} }
case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Output(doc="output sai file for 1st mating pair") var sai = outSai1 @Output(doc="output sai file for 1st mating pair") var sai = outSai1
def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai
@ -424,7 +426,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
} }
case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with BWACommonArgs { case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Input(doc="bwa alignment index file") var sai = inSai @Input(doc="bwa alignment index file") var sai = inSai
@Output(doc="output aligned bam file") var alignedBam = outBam @Output(doc="output aligned bam file") var alignedBam = outBam
@ -433,7 +435,7 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".bwa_sam_se" this.jobName = queueLogDir + outBam + ".bwa_sam_se"
} }
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with BWACommonArgs { case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1 @Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2