From 6b1b4931e7f4efb90b80b38ca99956a83be3ea1c Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 27 Apr 2011 02:29:56 +0000 Subject: [PATCH] Added FCP VE stratifications for Filter, FunctionalClass, and Stratification as requested by Corin. Feeding FCP UG the bam list instead of individual bams to cut scatter gather time from O(m^100) as measured by Chris to O(m^1). Fixed NPE when eval values aren't found in PipelineTests. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5694 348d0f76-0448-11de-a6fe-93d51630548a --- .../qscript/playground/FullCallingPipeline.q | 325 +++++------------- .../sting/queue/pipeline/PipelineTest.scala | 2 +- .../playground/FullCallingPipelineTest.scala | 12 +- 3 files changed, 96 insertions(+), 243 deletions(-) diff --git a/scala/qscript/playground/FullCallingPipeline.q b/scala/qscript/playground/FullCallingPipeline.q index 319fd9766..3f0e460fe 100755 --- a/scala/qscript/playground/FullCallingPipeline.q +++ b/scala/qscript/playground/FullCallingPipeline.q @@ -1,10 +1,30 @@ -import org.broadinstitute.sting.commandline.ArgumentSource +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + import org.broadinstitute.sting.datasources.pipeline.Pipeline import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction -import org.broadinstitute.sting.queue.extensions.samtools._ import org.broadinstitute.sting.queue.function.ListWriterFunction -import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, CloneFunction, ScatterFunction} import org.broadinstitute.sting.queue.library.ipf.intervals.ExpandIntervals import org.broadinstitute.sting.queue.QScript import collection.JavaConversions._ @@ -16,148 +36,47 @@ class FullCallingPipeline extends QScript { @Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y") var yamlFile: File = _ - @Input(doc="path to GATK jar", shortName="G", required=false) - var gatkJar: File = _ + @Input(doc="level of parallelism for UnifiedGenotyper (both for SNPs and indels). By default is set to 20.", shortName="varScatter", required=false) + var variantCallerScatterCount = 20 - @Input(doc="level of parallelism for IndelRealigner. By default is set to 1.", shortName="cleanerScatter", required=false) - var num_cleaner_scatter_jobs = 1 - - @Input(doc="level of parallelism for UnifiedGenotyper (both for SNPs and indels). By default is set to 20.", shortName="varScatter", required=false) - var num_var_scatter_jobs = 20 + @Argument(doc="memory limit for UnifiedGenotyper (both for SNPs and indels). By default is set to 4g.", shortName="varMemory", required=false) + var variantCallerMemory = 4 @Argument(doc="expand each target in input intervals by the specified number of bases (50 bases by default)", shortName="expand", required=false) var expandIntervals = 50 - @Input(doc="Skip indel-cleaning for BAM files (for testing only)", shortName="skipCleaning", required=false) - var skip_cleaning = false - - @Input(doc="ADPR script", shortName ="tearScript", required=false) - var tearScript: File = _ - private var pipeline: Pipeline = _ - private final val picardFixMatesClass = "net.sf.picard.sam.FixMateInformation" - trait CommandLineGATKArgs extends CommandLineGATK { - this.intervals = List(qscript.pipeline.getProject.getIntervalList) - this.jarFile = qscript.gatkJar this.reference_sequence = qscript.pipeline.getProject.getReferenceFile + this.intervals = List(qscript.pipeline.getProject.getIntervalList) this.memoryLimit = 4 } - - // ------------ SETUP THE PIPELINE ----------- // - - def script() { pipeline = PicardPipeline.parse(qscript.yamlFile) val projectBase: String = qscript.pipeline.getProject.getName + val base = projectBase + ".cleaned" + val bamType = "cleaned" - if (qscript.skip_cleaning) { - endToEnd(projectBase + ".uncleaned", "recalibrated") - } else { - val recalibratedSamples = qscript.pipeline.getSamples.filter(_.getBamFiles.contains("recalibrated")) + // Make the bam list + val writeBamList = new ListWriterFunction + writeBamList.analysisName = base + "_BamList" + writeBamList.jobOutputFile = ".queue/logs/Overall/WriteBamList.out" + writeBamList.inputFiles = qscript.pipeline.getSamples.filter(_.getBamFiles.contains(bamType)).map(_.getBamFiles.get(bamType)).toList + writeBamList.listFile = "Resources/" + base +".bamfiles.list" + add(writeBamList) - for ( sample <- recalibratedSamples ) { - val sampleId = sample.getId - // put unclean bams in unclean genotypers in advance, create the extension files - val bam = sample.getBamFiles.get("recalibrated") - if (!sample.getBamFiles.contains("cleaned")) { - sample.getBamFiles.put("cleaned", swapExt("CleanedBams", bam,"bam","cleaned.bam")) - } - - val cleaned_bam = sample.getBamFiles.get("cleaned") - val indel_targets = swapExt("CleanedBams/IntermediateFiles/"+sampleId, bam,"bam","realigner_targets.interval_list") - - // create the cleaning commands - val targetCreator = new RealignerTargetCreator with CommandLineGATKArgs - targetCreator.jobOutputFile = new File(".queue/logs/Cleaning/%s/RealignerTargetCreator.out".format(sampleId)) - targetCreator.jobName = "CreateTargets_"+sampleId - targetCreator.analysisName = "CreateTargets_"+sampleId - targetCreator.input_file :+= bam - targetCreator.out = indel_targets - targetCreator.memoryLimit = 2 - targetCreator.isIntermediate = true - - val realigner = new IndelRealigner with CommandLineGATKArgs - realigner.jobOutputFile = new File(".queue/logs/Cleaning/%s/IndelRealigner.out".format(sampleId)) - realigner.analysisName = "RealignBam_"+sampleId - realigner.input_file = targetCreator.input_file - realigner.targetIntervals = targetCreator.out - realigner.intervals = Nil - realigner.intervalsString = Nil - realigner.scatterCount = num_cleaner_scatter_jobs - realigner.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp) - realigner.rodBind :+= RodBind("indels", "VCF", swapExt(realigner.reference_sequence.getParentFile, realigner.reference_sequence, "fasta", "1kg_pilot_indels.vcf")) - - // if scatter count is > 1, do standard scatter gather, if not, explicitly set up fix mates - if (realigner.scatterCount > 1) { - realigner.out = cleaned_bam - realigner.setupScatterFunction = { - case scatter: ScatterFunction => - scatter.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather".format(sampleId)) - scatter.jobOutputFile = new File(".queue/logs/Cleaning/%s/Scatter.out".format(sampleId)) - } - realigner.setupCloneFunction = { - case (clone: CloneFunction, index: Int) => - clone.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Scatter_%s".format(sampleId, index)) - clone.jobOutputFile = new File(".queue/logs/Cleaning/%s/Scatter_%s.out".format(sampleId, index)) - } - realigner.setupGatherFunction = { - case (gather: BamGatherFunction, source: ArgumentSource) => - gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName)) - gather.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId)) - gather.memoryLimit = 6 - gather.javaMainClass = picardFixMatesClass - gather.assumeSorted = None - case (gather: GatherFunction, source: ArgumentSource) => - gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName)) - gather.jobOutputFile = new File(".queue/logs/Cleaning/%s/Gather_%s.out".format(sampleId, source.field.getName)) - } - - add(targetCreator,realigner) - } else { - realigner.out = swapExt("CleanedBams/IntermediateFiles/"+sampleId,bam,"bam","unfixed.cleaned.bam") - realigner.isIntermediate = true - - // Explicitly run fix mates if the function won't be scattered. - val fixMates = new PicardBamFunction { - @Input(doc="unfixed bam") var unfixed: File = _ - @Output(doc="fixed bam") var fixed: File = _ - def inputBams = List(unfixed) - def outputBam = fixed - } - - fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId)) - fixMates.memoryLimit = 6 - fixMates.javaMainClass = picardFixMatesClass - fixMates.unfixed = realigner.out - fixMates.fixed = cleaned_bam - fixMates.analysisName = "FixMates_"+sampleId - - // Add the fix mates explicitly - add(targetCreator,realigner,fixMates) - } - - var samtoolsindex = new SamtoolsIndexFunction - samtoolsindex.jobOutputFile = new File(".queue/logs/Cleaning/%s/SamtoolsIndex.out".format(sampleId)) - samtoolsindex.bamFile = cleaned_bam - samtoolsindex.analysisName = "index_cleaned_"+sampleId - //samtoolsindex.jobQueue = qscript.short_job_queue - add(samtoolsindex) - } - - endToEnd(projectBase + ".cleaned", "cleaned") - } - } - - def endToEnd(base: String, bamType: String) = { - val samples = qscript.pipeline.getSamples.filter(_.getBamFiles.contains(bamType)).toList - val bamFiles = samples.map(_.getBamFiles.get(bamType)) - - val ei : ExpandIntervals = new ExpandIntervals(qscript.pipeline.getProject.getIntervalList, 1, qscript.expandIntervals, new File("Resources", base + ".flanks.interval_list"), qscript.pipeline.getProject.getReferenceFile, "INTERVALS", "INTERVALS") - ei.jobOutputFile = new File(".queue/logs/Overall/ExpandIntervals.out") + val ei = new ExpandIntervals( + qscript.pipeline.getProject.getIntervalList, + 1, + qscript.expandIntervals, + "Resources/" + base + ".flanks.interval_list", + qscript.pipeline.getProject.getReferenceFile, + "INTERVALS", + "INTERVALS") + ei.jobOutputFile = ".queue/logs/Overall/ExpandIntervals.out" if (qscript.expandIntervals > 0) { add(ei) @@ -172,73 +91,43 @@ class FullCallingPipeline extends QScript { // Call indels val indels = new UnifiedGenotyper with CommandLineGATKArgs with ExpandedIntervals indels.analysisName = base + "_indels" - indels.jobOutputFile = new File(".queue/logs/IndelCalling/UnifiedGenotyper.indels.out") - indels.memoryLimit = 6 + indels.jobOutputFile = ".queue/logs/IndelCalling/UnifiedGenotyper.indels.out" indels.downsample_to_coverage = 600 indels.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.INDEL - indels.input_file = bamFiles + indels.input_file = List(writeBamList.listFile) indels.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp) - indels.out = new File("IndelCalls", base+".indels.vcf") - - indels.scatterCount = qscript.num_var_scatter_jobs - indels.setupScatterFunction = { - case scatter: ScatterFunction => - scatter.commandDirectory = new File("IndelCalls/ScatterGather") - scatter.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Scatter.out") - } - indels.setupCloneFunction = { - case (clone: CloneFunction, index: Int) => - clone.commandDirectory = new File("IndelCalls/ScatterGather/Scatter_%s".format(index)) - clone.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Scatter_%s.out".format(index)) - } - indels.setupGatherFunction = { - case (gather: GatherFunction, source: ArgumentSource) => - gather.commandDirectory = new File("IndelCalls/ScatterGather/Gather_%s".format(source.field.getName)) - gather.jobOutputFile = new File(".queue/logs/IndelCalling/ScatterGather/Gather_%s.out".format(source.field.getName)) - } + indels.out = "IndelCalls/" + base+".indels.vcf" + indels.scatterCount = qscript.variantCallerScatterCount + indels.memoryLimit = qscript.variantCallerMemory + add(indels) // Filter indels val filteredIndels = new VariantFiltration with CommandLineGATKArgs with ExpandedIntervals filteredIndels.analysisName = base + "_filteredIndels" - filteredIndels.jobOutputFile = new File(".queue/logs/IndelCalling/VariantFiltration.indels.out") + filteredIndels.jobOutputFile = ".queue/logs/IndelCalling/VariantFiltration.indels.out" filteredIndels.filterName ++= List("IndelQUALFilter","IndelSBFilter","IndelQDFilter") filteredIndels.filterExpression ++= List("\"QUAL<30.0\"","\"SB>-1.0\"","\"QD<2.0\"") filteredIndels.variantVCF = indels.out filteredIndels.out = swapExt("IndelCalls", indels.out, ".vcf",".filtered.vcf") + add(filteredIndels) // Call snps val snps = new UnifiedGenotyper with CommandLineGATKArgs with ExpandedIntervals snps.analysisName = base+"_snps" - snps.jobOutputFile = new File(".queue/logs/SNPCalling/UnifiedGenotyper.snps.out") - snps.memoryLimit = 6 + snps.jobOutputFile = ".queue/logs/SNPCalling/UnifiedGenotyper.snps.out" snps.downsample_to_coverage = 600 snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP - snps.input_file = bamFiles - snps.genotype_likelihoods_model = org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel.Model.SNP + snps.input_file = List(writeBamList.listFile) snps.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getGenotypeDbsnpType, qscript.pipeline.getProject.getGenotypeDbsnp) - snps.out = new File("SnpCalls", base+".snps.vcf") - - snps.scatterCount = qscript.num_var_scatter_jobs - snps.setupScatterFunction = { - case scatter: ScatterFunction => - scatter.commandDirectory = new File("SnpCalls/ScatterGather") - scatter.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Scatter.out") - } - snps.setupCloneFunction = { - case (clone: CloneFunction, index: Int) => - clone.commandDirectory = new File("SnpCalls/ScatterGather/Scatter_%s".format(index)) - clone.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Scatter_%s.out".format(index)) - } - snps.setupGatherFunction = { - case (gather: GatherFunction, source: ArgumentSource) => - gather.commandDirectory = new File("SnpCalls/ScatterGather/Gather_%s".format(source.field.getName)) - gather.jobOutputFile = new File(".queue/logs/SNPCalling/ScatterGather/Gather_%s.out".format(source.field.getName)) - } + snps.out = "SnpCalls/" + base+".snps.vcf" + snps.scatterCount = qscript.variantCallerScatterCount + snps.memoryLimit = qscript.variantCallerMemory + add(snps) // Filter snps val filteredSNPs = new VariantFiltration with CommandLineGATKArgs with ExpandedIntervals filteredSNPs.analysisName = base+"_filteredSNPs" - filteredSNPs.jobOutputFile = new File(".queue/logs/SNPCalling/VariantFiltration.snps.out") + filteredSNPs.jobOutputFile = ".queue/logs/SNPCalling/VariantFiltration.snps.out" filteredSNPs.filterName ++= List("SNPSBFilter","SNPQDFilter","SNPHRunFilter") filteredSNPs.filterExpression ++= List("\"SB>=0.10\"","\"QD<5.0\"","\"HRun>=4\"") filteredSNPs.clusterWindowSize = 10 @@ -246,92 +135,56 @@ class FullCallingPipeline extends QScript { filteredSNPs.rodBind :+= RodBind("mask", "VCF", filteredIndels.out) filteredSNPs.variantVCF = snps.out filteredSNPs.out = swapExt("SnpCalls",snps.out,".vcf",".filtered.vcf") + add(filteredSNPs) // Combine indels and snps into one VCF val combineAll = new CombineVariants with CommandLineGATKArgs with ExpandedIntervals combineAll.analysisName = base + "_combineAll" - combineAll.jobOutputFile = new File(".queue/logs/Combined/CombineVariants.out") - combineAll.variantMergeOptions = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION + combineAll.jobOutputFile = ".queue/logs/Combined/CombineVariants.out" + combineAll.variantmergeoption = org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType.UNION combineAll.rod_priority_list = "Indels,SNPs" combineAll.rodBind :+= RodBind("Indels", "VCF", filteredIndels.out) combineAll.rodBind :+= RodBind("SNPs", "VCF", filteredSNPs.out) - combineAll.out = new File("CombinedCalls", base + ".allVariants.filtered.vcf") + combineAll.out = "CombinedCalls/" + base + ".snps_and_indels.filtered.vcf" + add(combineAll) // Annotate variants val annotated = new GenomicAnnotator with CommandLineGATKArgs with ExpandedIntervals annotated.analysisName = base+"_annotated" - annotated.jobOutputFile = new File(".queue/logs/Combined/GenomicAnnotator.out") + annotated.jobOutputFile = ".queue/logs/Combined/GenomicAnnotator.out" annotated.rodToIntervalTrackName = "variant" annotated.rodBind :+= RodBind("variant", "VCF", combineAll.out) annotated.rodBind :+= RodBind("refseq", "AnnotatorInputTable", qscript.pipeline.getProject.getRefseqTable) - annotated.out = new File(base + ".snps_and_indels.filtered.annotated.vcf") + annotated.out = base + ".snps_and_indels.filtered.annotated.vcf" + add(annotated) // Variant eval the standard region val stdEval = new VariantEval with CommandLineGATKArgs - stdEval.analysisName = base+"_VariantEval" - stdEval.jobOutputFile = new File(".queue/logs/Overall/VariantEval.std.out") - stdEval.noST = true - stdEval.noEV = true - stdEval.evalModule ++= List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants") - stdEval.stratificationModule ++= List("EvalRod", "CompRod", "Novelty") + stdEval.analysisName = base+"_StandardVariantEval" + stdEval.jobOutputFile = ".queue/logs/Overall/VariantEval.std.out" + stdEval.doNotUseAllStandardStratifications = true + stdEval.doNotUseAllStandardModules = true + stdEval.evalModule = List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants") + stdEval.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "FunctionalClass", "Sample") stdEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp) stdEval.rodBind :+= RodBind("eval", "VCF", annotated.out) stdEval.out = swapExt(annotated.out, ".vcf", ".eval") + add(stdEval) - // Variant eval the flanking region - val flanksEval = new VariantEval with CommandLineGATKArgs - flanksEval.analysisName = base+"_VariantEval" - flanksEval.jobOutputFile = new File(".queue/logs/Overall/VariantEval.flanks.out") - flanksEval.intervals = List(ei.outList) - flanksEval.noST = true - flanksEval.noEV = true - flanksEval.evalModule ++= List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants") - flanksEval.stratificationModule ++= List("EvalRod", "CompRod", "Novelty") - flanksEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp) - flanksEval.rodBind :+= RodBind("eval", "VCF", annotated.out) - flanksEval.out = swapExt(annotated.out, ".vcf", ".flanks.eval") - - // Make the bam list - val listOfBams = new File("Resources", base +".BamFiles.list") - - val writeBamList = new ListWriterFunction - writeBamList.analysisName = base + "_BamList" - writeBamList.jobOutputFile = new File(".queue/logs/Overall/WriteBamList.out") - writeBamList.inputFiles = bamFiles - writeBamList.listFile = listOfBams - - add(indels, filteredIndels, snps, filteredSNPs, combineAll, annotated, stdEval, writeBamList) - if (qscript.expandIntervals > 0) { + // Variant eval the flanking region + val flanksEval = new VariantEval with CommandLineGATKArgs + flanksEval.analysisName = base+"_FlanksVariantEval" + flanksEval.jobOutputFile = ".queue/logs/Overall/VariantEval.flanks.out" + flanksEval.intervals = List(ei.outList) + flanksEval.doNotUseAllStandardStratifications = true + flanksEval.doNotUseAllStandardModules = true + flanksEval.evalModule = List("SimpleMetricsByAC", "TiTvVariantEvaluator", "CountVariants") + flanksEval.stratificationModule = List("EvalRod", "CompRod", "Novelty", "Filter", "FunctionalClass", "Sample") + flanksEval.rodBind :+= RodBind("dbsnp", qscript.pipeline.getProject.getEvalDbsnpType, qscript.pipeline.getProject.getEvalDbsnp) + flanksEval.rodBind :+= RodBind("eval", "VCF", annotated.out) + flanksEval.out = swapExt(annotated.out, ".vcf", ".flanks.eval") add(flanksEval) } - - // Run the ADPR and make pretty stuff - if (qscript.tearScript != null) { - class rCommand extends CommandLineFunction{ - @Input(doc="R script") - var script: File = _ - @Input(doc="pipeline yaml") - var yaml: File = _ - @Input(doc="list of bams") - var bamlist: File =_ - @Input(doc="Eval files root") - var evalroot: File =_ - @Output(doc="tearsheet loc") - var tearsheet: File =_ - def commandLine = "Rscript %s -yaml %s -bamlist %s -evalroot %s -tearout %s".format(script, yaml, bamlist, evalroot, tearsheet) - } - - val adpr = new rCommand - adpr.analysisName = base + "_ADPR" - adpr.bamlist = listOfBams - adpr.yaml = qscript.yamlFile.getAbsoluteFile - adpr.script = qscript.tearScript - adpr.evalroot = stdEval.out - adpr.jobOutputFile = new File(".queue/logs/Overall/ADPR.out") - adpr.tearsheet = new File("VariantCalls", base + ".tearsheet.pdf") - - add(adpr) - } } } diff --git a/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala b/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala index 3a8df2ff4..96f6f3482 100644 --- a/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/pipeline/PipelineTest.scala @@ -190,7 +190,7 @@ object PipelineTest extends BaseTest with Logging { println(" value (min,target,max) table key metric") for (validation <- evalSpec.validations) { val value = parser.getValue(validation.table, validation.key, validation.metric) - val inRange = validation.inRange(value) + val inRange = if (value == null) false else validation.inRange(value) val flag = if (!inRange) "*" else " " println(" %s %s (%s,%s,%s) %s %s %s".format(flag, value, validation.min, validation.target, validation.max, validation.table, validation.key, validation.metric)) allInRange &= inRange diff --git a/scala/test/org/broadinstitute/sting/queue/pipeline/playground/FullCallingPipelineTest.scala b/scala/test/org/broadinstitute/sting/queue/pipeline/playground/FullCallingPipelineTest.scala index ed7558472..32d476962 100644 --- a/scala/test/org/broadinstitute/sting/queue/pipeline/playground/FullCallingPipelineTest.scala +++ b/scala/test/org/broadinstitute/sting/queue/pipeline/playground/FullCallingPipelineTest.scala @@ -37,12 +37,12 @@ class FullCallingPipelineTest { val k1gChr20Dataset = { val dataset = newK1gDataset("Barcoded_1000G_WEx_chr20", true) - dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.all", "nCalledLoci", 1391) - dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.known", "nCalledLoci", 1142) - dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.novel", "nCalledLoci", 249) - dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.all", "tiTvRatio", 3.6250) - dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.known", "tiTvRatio", 3.7190) - dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.novel", "tiTvRatio", 3.2037) + dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.all.all", "nCalledLoci", 1391) + dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.known.all", "nCalledLoci", 1142) + dataset.validations :+= new IntegerValidation("CountVariants", "dbsnp.eval.called.all.novel.all", "nCalledLoci", 249) + dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.all.all", "tiTvRatio", 3.6250) + dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.known.all", "tiTvRatio", 3.7190) + dataset.validations :+= new DoubleValidation("TiTvVariantEvaluator", "dbsnp.eval.called.all.novel.all", "tiTvRatio", 3.2037) dataset }