Added FCP VE stratifications for Filter, FunctionalClass, and Stratification as requested by Corin.
Feeding FCP UG the bam list instead of individual bams to cut scatter gather time from O(m^100) as measured by Chris to O(m^1). Fixed NPE when eval values aren't found in PipelineTests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5694 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f3dacd3c40
commit
6b1b4931e7
|
|
@ -1,10 +1,30 @@
|
|||
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction
|
||||
import org.broadinstitute.sting.queue.extensions.samtools._
|
||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||
import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, CloneFunction, ScatterFunction}
|
||||
import org.broadinstitute.sting.queue.library.ipf.intervals.ExpandIntervals
|
||||
import org.broadinstitute.sting.queue.QScript
|
||||
import collection.JavaConversions._
|
||||
|
|
@ -16,148 +36,47 @@ class FullCallingPipeline extends QScript {
|
|||
@Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y")
|
||||
var yamlFile: File = _
|
||||
|
||||
@Input(doc="path to GATK jar", shortName="G", required=false)
|
||||
var gatkJar: File = _
|
||||
@Input(doc="level of parallelism for UnifiedGenotyper (both for SNPs and indels). By default is set to 20.", shortName="varScatter", required=false)
|
||||
var variantCallerScatterCount = 20
|
||||
|
||||
@Input(doc="level of parallelism for IndelRealigner. By default is set to 1.", shortName="cleanerScatter", required=false)
|
||||
var num_cleaner_scatter_jobs = 1
|
||||
|
||||
@Input(doc="level of parallelism for UnifiedGenotyper (both for SNPs and indels). By default is set to 20.", shortName="varScatter", required=false)
|
||||
var num_var_scatter_jobs = 20
|
||||
@Argument(doc="memory limit for UnifiedGenotyper (both for SNPs and indels). By default is set to 4g.", shortName="varMemory", required=false)
|
||||
var variantCallerMemory = 4
|
||||
|
||||
@Argument(doc="expand each target in input intervals by the specified number of bases (50 bases by default)", shortName="expand", required=false)
|
||||
var expandIntervals = 50
|
||||
|
||||
@Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false)
|
||||
var skip_cleaning = false
|
||||
|
||||
@Input(doc="ADPR script", shortName ="tearScript", required=false)
|
||||
var tearScript: File = _
|
||||
|
||||
private var pipeline: Pipeline = _
|
||||
|
||||
private final val picardFixMatesClass = "net.sf.picard.sam.FixMateInformation"
|
||||
|
||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||
this.intervals = List(qscript.pipeline.getProject.getIntervalList)
|
||||
this.jarFile = qscript.gatkJar
|
||||
this.reference_sequence = qscript.pipeline.getProject.getReferenceFile
|
||||
this.intervals = List(qscript.pipeline.getProject.getIntervalList)
|
||||
this.memoryLimit = 4
|
||||
}
|
||||
|
||||
|
||||
// ------------ SETUP THE PIPELINE ----------- //
|
||||
|
||||
|
||||
def script() {
|
||||
pipeline = PicardPipeline.parse(qscript.yamlFile)
|
||||
|
||||
val projectBase: String = qscript.pipeline.getProject.getName
|
||||
val base = projectBase + ".cleaned"
|
||||
val bamType = "cleaned"
|
||||
|
||||
if (qscript.skip_cleaning) {
|
||||
endToEnd(projectBase + ".uncleaned", "recalibrated")
|
||||
} else {
|
||||
val recalibratedSamples = qscript.pipeline.getSamples.filter(_.getBamFiles.contains("recalibrated"))
|
||||
// Make the bam list
|
||||
val writeBamList = new ListWriterFunction
|
||||
writeBamList.analysisName = base + "_BamList"
|
||||
writeBamList.jobOutputFile = ".queue/logs/Overall/WriteBamList.out"
|
||||
writeBamList.inputFiles = qscript.pipeline.getSamples.filter(_.getBamFiles.contains(bamType)).map(_.getBamFiles.get(bamType)).toList
|
||||
writeBamList.listFile = "Resources/" + base +".bamfiles.list"
|
||||
add(writeBamList)
|
||||
|
||||
for ( sample <- recalibratedSamples ) {
|
||||
val sampleId = sample.getId
|
||||
// put unclean bams in unclean genotypers in advance, create the extension files
|
||||
val bam = sample.getBamFiles.get("recalibrated")
|
||||
if (!sample.getBamFiles.contains("cleaned")) {
|
||||
sample.getBamFiles.put("cleaned", swapExt("CleanedBams", bam,"bam","cleaned.bam"))
|
||||
}
|
||||
|
||||
val cleaned_bam = sample.getBamFiles.get("cleaned")
|
||||
val indel_targets = swapExt("CleanedBams/IntermediateFiles/"+sampleId, bam,"bam","realigner_targets.interval_list")
|
||||
|
||||
// create the cleaning commands
|
||||
val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs
|
||||
targetCreator.jobOutputFile = new File(".queue/logs/Cleaning/%s/RealignerTargetCreator.out".format(sampleId))
|
||||
targetCreator.jobName = "CreateTargets_"+sampleId
|
||||
targetCreator.analysisName = "CreateTargets_"+sampleId
|
||||
targetCreator.input_file :+= bam
|
||||
targetCreator.out = indel_targets
|
||||
targetCreator.memoryLimit = 2
|
||||
targetCreator.isIntermediate = true
|
||||
|
||||
val realigner = new IndelRealigner with CommandLineGATKArgs
|
||||
realigner.jobOutputFile = new File(".queue/logs/Cleaning/%s/IndelRealigner.out".format(sampleId))
|
||||
realigner.analysisName = "RealignBam_"+sampleId
|
||||
realigner.input_file = targetCreator.input_file
|
||||
realigner.targetIntervals = targetCreator.out
|
||||
realigner.intervals = Nil
|
||||
realigner.intervalsString = Nil
|
||||
realigner.scatterCount = num_cleaner_scatter_jobs
|
||||
realigner.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp)
|
||||
realigner.rodBind :+= RodBind("indels", "VCF", swapExt(realigner.reference_sequence.getParentFile, realigner.reference_sequence, "fasta", "1kg_pilot_indels.vcf"))
|
||||
|
||||
// if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates
|
||||
if (realigner.scatterCount > 1) {
|
||||
realigner.out = cleaned_bam
|
||||
realigner.setupScatterFunction = {
|
||||
case scatter: ScatterFunction =>
|
||||
scatter.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather".format(sampleId))
|
||||
scatter.jobOutputFile = new File(".queue/logs/Cleaning/%s/Scatter.out".format(sampleId))
|
||||
}
|
||||
realigner.setupCloneFunction = {
|
||||
case (clone: CloneFunction, index: Int) =>
|
||||
clone.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Scatter_%s".format(sampleId, index))
|
||||
clone.jobOutputFile = new File(".queue/logs/Cleaning/%s/Scatter_%s.out".format(sampleId, index))
|
||||
}
|
||||
realigner.setupGatherFunction = {
|
||||
case (gather: BamGatherFunction, source: ArgumentSource) =>
|
||||
gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName))
|
||||
gather.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
|
||||
gather.memoryLimit = 6
|
||||
gather.javaMainClass = picardFixMatesClass
|
||||
gather.assumeSorted = None
|
||||
case (gather: GatherFunction, source: ArgumentSource) =>
|
||||
gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName))
|
||||
gather.jobOutputFile = new File(".queue/logs/Cleaning/%s/Gather_%s.out".format(sampleId, source.field.getName))
|
||||
}
|
||||
|
||||
add(targetCreator,realigner)
|
||||
} else {
|
||||
realigner.out = swapExt("CleanedBams/IntermediateFiles/"+sampleId,bam,"bam","unfixed.cleaned.bam")
|
||||
realigner.isIntermediate = true
|
||||
|
||||
// Explicitly run fix mates if the function won't be scattered.
|
||||
val fixMates = new PicardBamFunction {
|
||||
@Input(doc="unfixed bam") var unfixed: File = _
|
||||
@Output(doc="fixed bam") var fixed: File = _
|
||||
def inputBams = List(unfixed)
|
||||
def outputBam = fixed
|
||||
}
|
||||
|
||||
fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
|
||||
fixMates.memoryLimit = 6
|
||||
fixMates.javaMainClass = picardFixMatesClass
|
||||
fixMates.unfixed = realigner.out
|
||||
fixMates.fixed = cleaned_bam
|
||||
fixMates.analysisName = "FixMates_"+sampleId
|
||||
|
||||
// Add the fix mates explicitly
|
||||
add(targetCreator,realigner,fixMates)
|
||||
}
|
||||
|
||||
var samtoolsindex = new SamtoolsIndexFunction
|
||||
samtoolsindex.jobOutputFile = new File(".queue/logs/Cleaning/%s/SamtoolsIndex.out".format(sampleId))
|
||||
samtoolsindex.bamFile = cleaned_bam
|
||||
samtoolsindex.analysisName = "index_cleaned_"+sampleId
|
||||
//samtoolsindex.jobQueue = qscript.short_job_queue
|
||||
add(samtoolsindex)
|
||||
}
|
||||
|
||||
endToEnd(projectBase + ".cleaned", "cleaned")
|
||||
}
|
||||
}
|
||||
|
||||
def endToEnd(base: String, bamType: String) = {
|
||||
val samples = qscript.pipeline.getSamples.filter(_.getBamFiles.contains(bamType)).toList
|
||||
val bamFiles = samples.map(_.getBamFiles.get(bamType))
|
||||
|
||||
val ei : ExpandIntervals = new ExpandIntervals(qscript.pipeline.getProject.getIntervalList, 1, qscript.expandIntervals, new File("Resources", base + ".flanks.interval_list"), qscript.pipeline.getProject.getReferenceFile, "INTERVALS", "INTERVALS")
|
||||
ei.jobOutputFile = new File(".queue/logs/Overall/ExpandIntervals.out")
|
||||
val ei = new ExpandIntervals(
|
||||
qscript.pipeline.getProject.getIntervalList,
|
||||
1,
|
||||
qscript.expandIntervals,
|
||||
"Resources/" + base + ".flanks.interval_list",
|
||||
qscript.pipeline.getProject.getReferenceFile,
|
||||
"INTERVALS",
|
||||
"INTERVALS")
|
||||
ei.jobOutputFile = ".queue/logs/Overall/ExpandIntervals.out"
|
||||
|
||||
if (qscript.expandIntervals > 0) {
|
||||
add(ei)
|
||||
|
|
@ -172,73 +91,43 @@ class FullCallingPipeline extends QScript {
|
|||
// Call indels
|
||||
val indels = new UnifiedGenotyper with CommandLineGATKArgs with ExpandedIntervals
|
||||
indels.analysisName = base + "_indels"
|
||||
indels.jobOutputFile = new File(".queue/logs/IndelCalling/UnifiedGenotyper.indels.out")
|
||||
indels.memoryLimit = 6
|
||||
indels.jobOutputFile = ".queue/logs/IndelCalling/UnifiedGenotyper.indels.out"
|
||||
indels.downsample_to_coverage = 600
|
||||
indels.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.INDEL
|
||||
indels.input_file = bamFiles
|
||||
indels.input_file = List(writeBamList.listFile)
|
||||
indels.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp)
|
||||
indels.out = new File("IndelCalls", base+".indels.vcf")
|
||||
|
||||
indels.scatterCount = qscript.num_var_scatter_jobs
|
||||
indels.setupScatterFunction = {
|
||||
case scatter: ScatterFunction =>
|
||||
scatter.commandDirectory = new File("IndelCalls/ScatterGather")
|
||||
scatter.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Scatter.out")
|
||||
}
|
||||
indels.setupCloneFunction = {
|
||||
case (clone: CloneFunction, index: Int) =>
|
||||
clone.commandDirectory = new File("IndelCalls/ScatterGather/Scatter_%s".format(index))
|
||||
clone.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Scatter_%s.out".format(index))
|
||||
}
|
||||
indels.setupGatherFunction = {
|
||||
case (gather: GatherFunction, source: ArgumentSource) =>
|
||||
gather.commandDirectory = new File("IndelCalls/ScatterGather/Gather_%s".format(source.field.getName))
|
||||
gather.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Gather_%s.out".format(source.field.getName))
|
||||
}
|
||||
indels.out = "IndelCalls/" + base+".indels.vcf"
|
||||
indels.scatterCount = qscript.variantCallerScatterCount
|
||||
indels.memoryLimit = qscript.variantCallerMemory
|
||||
add(indels)
|
||||
|
||||
// Filter indels
|
||||
val filteredIndels = new VariantFiltration with CommandLineGATKArgs with ExpandedIntervals
|
||||
filteredIndels.analysisName = base + "_filteredIndels"
|
||||
filteredIndels.jobOutputFile = new File(".queue/logs/IndelCalling/VariantFiltration.indels.out")
|
||||
filteredIndels.jobOutputFile = ".queue/logs/IndelCalling/VariantFiltration.indels.out"
|
||||
filteredIndels.filterName ++= List("IndelQUALFilter","IndelSBFilter","IndelQDFilter")
|
||||
filteredIndels.filterExpression ++= List("\"QUAL<30.0\"","\"SB>-1.0\"","\"QD<2.0\"")
|
||||
filteredIndels.variantVCF = indels.out
|
||||
filteredIndels.out = swapExt("IndelCalls", indels.out, ".vcf",".filtered.vcf")
|
||||
add(filteredIndels)
|
||||
|
||||
// Call snps
|
||||
val snps = new UnifiedGenotyper with CommandLineGATKArgs with ExpandedIntervals
|
||||
snps.analysisName = base+"_snps"
|
||||
snps.jobOutputFile = new File(".queue/logs/SNPCalling/UnifiedGenotyper.snps.out")
|
||||
snps.memoryLimit = 6
|
||||
snps.jobOutputFile = ".queue/logs/SNPCalling/UnifiedGenotyper.snps.out"
|
||||
snps.downsample_to_coverage = 600
|
||||
snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
|
||||
snps.input_file = bamFiles
|
||||
snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP
|
||||
snps.input_file = List(writeBamList.listFile)
|
||||
snps.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp)
|
||||
snps.out = new File("SnpCalls", base+".snps.vcf")
|
||||
|
||||
snps.scatterCount = qscript.num_var_scatter_jobs
|
||||
snps.setupScatterFunction = {
|
||||
case scatter: ScatterFunction =>
|
||||
scatter.commandDirectory = new File("SnpCalls/ScatterGather")
|
||||
scatter.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Scatter.out")
|
||||
}
|
||||
snps.setupCloneFunction = {
|
||||
case (clone: CloneFunction, index: Int) =>
|
||||
clone.commandDirectory = new File("SnpCalls/ScatterGather/Scatter_%s".format(index))
|
||||
clone.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Scatter_%s.out".format(index))
|
||||
}
|
||||
snps.setupGatherFunction = {
|
||||
case (gather: GatherFunction, source: ArgumentSource) =>
|
||||
gather.commandDirectory = new File("SnpCalls/ScatterGather/Gather_%s".format(source.field.getName))
|
||||
gather.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Gather_%s.out".format(source.field.getName))
|
||||
}
|
||||
snps.out = "SnpCalls/" + base+".snps.vcf"
|
||||
snps.scatterCount = qscript.variantCallerScatterCount
|
||||
snps.memoryLimit = qscript.variantCallerMemory
|
||||
add(snps)
|
||||
|
||||
// Filter snps
|
||||
val filteredSNPs = new VariantFiltration with CommandLineGATKArgs with ExpandedIntervals
|
||||
filteredSNPs.analysisName = base+"_filteredSNPs"
|
||||
filteredSNPs.jobOutputFile = new File(".queue/logs/SNPCalling/VariantFiltration.snps.out")
|
||||
filteredSNPs.jobOutputFile = ".queue/logs/SNPCalling/VariantFiltration.snps.out"
|
||||
filteredSNPs.filterName ++= List("SNPSBFilter","SNPQDFilter","SNPHRunFilter")
|
||||
filteredSNPs.filterExpression ++= List("\"SB>=0.10\"","\"QD<5.0\"","\"HRun>=4\"")
|
||||
filteredSNPs.clusterWindowSize = 10
|
||||
|
|
@ -246,92 +135,56 @@ class FullCallingPipeline extends QScript {
|
|||
filteredSNPs.rodBind :+= RodBind("mask", "VCF", filteredIndels.out)
|
||||
filteredSNPs.variantVCF = snps.out
|
||||
filteredSNPs.out = swapExt("SnpCalls",snps.out,".vcf",".filtered.vcf")
|
||||
add(filteredSNPs)
|
||||
|
||||
// Combine indels and snps into one VCF
|
||||
val combineAll = new CombineVariants with CommandLineGATKArgs with ExpandedIntervals
|
||||
combineAll.analysisName = base + "_combineAll"
|
||||
combineAll.jobOutputFile = new File(".queue/logs/Combined/CombineVariants.out")
|
||||
combineAll.variantMergeOptions = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION
|
||||
combineAll.jobOutputFile = ".queue/logs/Combined/CombineVariants.out"
|
||||
combineAll.variantmergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION
|
||||
combineAll.rod_priority_list = "Indels,SNPs"
|
||||
combineAll.rodBind :+= RodBind("Indels", "VCF", filteredIndels.out)
|
||||
combineAll.rodBind :+= RodBind("SNPs", "VCF", filteredSNPs.out)
|
||||
combineAll.out = new File("CombinedCalls", base + ".allVariants.filtered.vcf")
|
||||
combineAll.out = "CombinedCalls/" + base + ".snps_and_indels.filtered.vcf"
|
||||
add(combineAll)
|
||||
|
||||
// Annotate variants
|
||||
val annotated = new GenomicAnnotator with CommandLineGATKArgs with ExpandedIntervals
|
||||
annotated.analysisName = base+"_annotated"
|
||||
annotated.jobOutputFile = new File(".queue/logs/Combined/GenomicAnnotator.out")
|
||||
annotated.jobOutputFile = ".queue/logs/Combined/GenomicAnnotator.out"
|
||||
annotated.rodToIntervalTrackName = "variant"
|
||||
annotated.rodBind :+= RodBind("variant", "VCF", combineAll.out)
|
||||
annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.pipeline.getProject.getRefseqTable)
|
||||
annotated.out = new File(base + ".snps_and_indels.filtered.annotated.vcf")
|
||||
annotated.out = base + ".snps_and_indels.filtered.annotated.vcf"
|
||||
add(annotated)
|
||||
|
||||
// Variant eval the standard region
|
||||
val stdEval = new VariantEval with CommandLineGATKArgs
|
||||
stdEval.analysisName = base+"_VariantEval"
|
||||
stdEval.jobOutputFile = new File(".queue/logs/Overall/VariantEval.std.out")
|
||||
stdEval.noST = true
|
||||
stdEval.noEV = true
|
||||
stdEval.evalModule ++= List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants")
|
||||
stdEval.stratificationModule ++= List("EvalRod", "CompRod", "Novelty")
|
||||
stdEval.analysisName = base+"_StandardVariantEval"
|
||||
stdEval.jobOutputFile = ".queue/logs/Overall/VariantEval.std.out"
|
||||
stdEval.doNotUseAllStandardStratifications = true
|
||||
stdEval.doNotUseAllStandardModules = true
|
||||
stdEval.evalModule = List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants")
|
||||
stdEval.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "FunctionalClass", "Sample")
|
||||
stdEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp)
|
||||
stdEval.rodBind :+= RodBind("eval", "VCF", annotated.out)
|
||||
stdEval.out = swapExt(annotated.out, ".vcf", ".eval")
|
||||
add(stdEval)
|
||||
|
||||
// Variant eval the flanking region
|
||||
val flanksEval = new VariantEval with CommandLineGATKArgs
|
||||
flanksEval.analysisName = base+"_VariantEval"
|
||||
flanksEval.jobOutputFile = new File(".queue/logs/Overall/VariantEval.flanks.out")
|
||||
flanksEval.intervals = List(ei.outList)
|
||||
flanksEval.noST = true
|
||||
flanksEval.noEV = true
|
||||
flanksEval.evalModule ++= List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants")
|
||||
flanksEval.stratificationModule ++= List("EvalRod", "CompRod", "Novelty")
|
||||
flanksEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp)
|
||||
flanksEval.rodBind :+= RodBind("eval", "VCF", annotated.out)
|
||||
flanksEval.out = swapExt(annotated.out, ".vcf", ".flanks.eval")
|
||||
|
||||
// Make the bam list
|
||||
val listOfBams = new File("Resources", base +".BamFiles.list")
|
||||
|
||||
val writeBamList = new ListWriterFunction
|
||||
writeBamList.analysisName = base + "_BamList"
|
||||
writeBamList.jobOutputFile = new File(".queue/logs/Overall/WriteBamList.out")
|
||||
writeBamList.inputFiles = bamFiles
|
||||
writeBamList.listFile = listOfBams
|
||||
|
||||
add(indels, filteredIndels, snps, filteredSNPs, combineAll, annotated, stdEval, writeBamList)
|
||||
|
||||
if (qscript.expandIntervals > 0) {
|
||||
// Variant eval the flanking region
|
||||
val flanksEval = new VariantEval with CommandLineGATKArgs
|
||||
flanksEval.analysisName = base+"_FlanksVariantEval"
|
||||
flanksEval.jobOutputFile = ".queue/logs/Overall/VariantEval.flanks.out"
|
||||
flanksEval.intervals = List(ei.outList)
|
||||
flanksEval.doNotUseAllStandardStratifications = true
|
||||
flanksEval.doNotUseAllStandardModules = true
|
||||
flanksEval.evalModule = List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants")
|
||||
flanksEval.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "FunctionalClass", "Sample")
|
||||
flanksEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp)
|
||||
flanksEval.rodBind :+= RodBind("eval", "VCF", annotated.out)
|
||||
flanksEval.out = swapExt(annotated.out, ".vcf", ".flanks.eval")
|
||||
add(flanksEval)
|
||||
}
|
||||
|
||||
// Run the ADPR and make pretty stuff
|
||||
if (qscript.tearScript != null) {
|
||||
class rCommand extends CommandLineFunction{
|
||||
@Input(doc="R script")
|
||||
var script: File = _
|
||||
@Input(doc="pipeline yaml")
|
||||
var yaml: File = _
|
||||
@Input(doc="list of bams")
|
||||
var bamlist: File =_
|
||||
@Input(doc="Eval files root")
|
||||
var evalroot: File =_
|
||||
@Output(doc="tearsheet loc")
|
||||
var tearsheet: File =_
|
||||
def commandLine = "Rscript %s -yaml %s -bamlist %s -evalroot %s -tearout %s".format(script, yaml, bamlist, evalroot, tearsheet)
|
||||
}
|
||||
|
||||
val adpr = new rCommand
|
||||
adpr.analysisName = base + "_ADPR"
|
||||
adpr.bamlist = listOfBams
|
||||
adpr.yaml = qscript.yamlFile.getAbsoluteFile
|
||||
adpr.script = qscript.tearScript
|
||||
adpr.evalroot = stdEval.out
|
||||
adpr.jobOutputFile = new File(".queue/logs/Overall/ADPR.out")
|
||||
adpr.tearsheet = new File("VariantCalls", base + ".tearsheet.pdf")
|
||||
|
||||
add(adpr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -190,7 +190,7 @@ object PipelineTest extends BaseTest with Logging {
|
|||
println(" value (min,target,max) table key metric")
|
||||
for (validation <- evalSpec.validations) {
|
||||
val value = parser.getValue(validation.table, validation.key, validation.metric)
|
||||
val inRange = validation.inRange(value)
|
||||
val inRange = if (value == null) false else validation.inRange(value)
|
||||
val flag = if (!inRange) "*" else " "
|
||||
println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric))
|
||||
allInRange &= inRange
|
||||
|
|
|
|||
|
|
@ -37,12 +37,12 @@ class FullCallingPipelineTest {
|
|||
val k1gChr20Dataset = {
|
||||
val dataset = newK1gDataset("Barcoded_1000G_WEx_chr20", true)
|
||||
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.all", "nCalledLoci", 1391)
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.known", "nCalledLoci", 1142)
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.novel", "nCalledLoci", 249)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.all", "tiTvRatio", 3.6250)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.known", "tiTvRatio", 3.7190)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.novel", "tiTvRatio", 3.2037)
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.all.all", "nCalledLoci", 1391)
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.known.all", "nCalledLoci", 1142)
|
||||
dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.novel.all", "nCalledLoci", 249)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.all.all", "tiTvRatio", 3.6250)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.known.all", "tiTvRatio", 3.7190)
|
||||
dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.novel.all", "tiTvRatio", 3.2037)
|
||||
|
||||
dataset
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue