diff --git a/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/ONLY_GENOTYPE_xhmmCNVpipeline.scala b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/ONLY_GENOTYPE_xhmmCNVpipeline.scala new file mode 100644 index 000000000..83379787f --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/ONLY_GENOTYPE_xhmmCNVpipeline.scala @@ -0,0 +1,103 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.qscripts.CNV + +import org.broadinstitute.gatk.queue.extensions.gatk._ +import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.queue.util.VCF_BAM_utilities +import org.broadinstitute.gatk.queue.extensions.gatk.DoC._ +import org.broadinstitute.gatk.utils.commandline._ +import java.io.{File, PrintStream, PrintWriter} +import org.broadinstitute.gatk.utils.text.XReadLines +import collection.JavaConversions._ +import org.broadinstitute.gatk.tools.walkers.coverage.CoverageUtils +import org.broadinstitute.gatk.queue.function.scattergather.{CloneFunction, ScatterFunction, GatherFunction, ScatterGatherableFunction} +import org.broadinstitute.gatk.queue.function.{CommandLineFunction, InProcessFunction} +import org.broadinstitute.gatk.utils.io.IOUtils + +class ONLY_GENOTYPE_xhmmCNVpipeline extends QScript { + qscript => + + @Input(doc = "bam input, as as a list of .bam files, or a list of bam files with sample IDs to be used ( as specified at https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_CommandLineGATK.html#--sample_rename_mapping_file )", shortName = "I", required = true) + var bams: File = _ + + @Input(doc = "xhmm executable file", shortName = "xhmmExec", required = true) + var xhmmExec: File = _ + + @Input(shortName = "R", doc = "ref", required = true) + var referenceFile: File = _ + + @Argument(doc = "Samples to run together for DoC, CNV discovery, and CNV genotyping. By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false) + var samplesPerJob = 1 + + @Output(doc = "Base name for files to output", shortName = "o", required = true) + var outputBase: File = _ + + @Argument(shortName = "xhmmParams", doc = "xhmm model parameters file", required = true) + var xhmmParamsArg: File = _ + + @Argument(shortName = "genotypeParams", doc = "xhmm command-line parameters for genotyping step", required = false) + var genotypeCommandLineParams: String = "" + + @Argument(shortName = "addGenotypeRegions", doc = "Additional interval list files to be genotyped", required = false) + var addGenotypeRegions: List[File] = List[File]() + + @Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false) + var longJobQueue: String = "" + + @Argument(shortName = "filteredZscored", doc = "File of PCA-normalized read depths, after filtering and Z-score calculation", required = true) + var filteredZscored: File = _ + + @Argument(shortName = "originalSameFiltered", doc = "File of original read depths, using same filters (samples and targets) as Z-score matrix [filteredZscored argument]", required = true) + var originalSameFiltered: File = _ + + + trait LongRunTime extends CommandLineFunction { + if (longJobQueue != "") + this.jobQueue = longJobQueue + } + + def script = { + val parseMixedInputBamList = parseBamListWithOptionalSampleMappings(bams) + + val processMixedInputBamList = new ProcessBamListWithOptionalSampleMappings(parseMixedInputBamList, outputBase.getPath) + add(processMixedInputBamList) + + val samples: List[String] = parseMixedInputBamList.sampleToBams.keys.toList + Console.out.printf("Samples are %s%n", samples) + + val groups: List[Group] = buildDoCgroups(samples, parseMixedInputBamList.sampleToBams, samplesPerJob, outputBase) + for (group <- groups) { + Console.out.printf("Group is %s%n", group) + } + + for (regionsFile <- addGenotypeRegions) { + val genotypeRegions = new GenotypeCNVs(filteredZscored, regionsFile, originalSameFiltered, new File(outputBase.getParent + "/" + regionsFile.getName.replace(".interval_list", "") + "." + outputBase.getName), xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime + add(genotypeRegions) + } + } + +} diff --git a/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/xhmmCNVpipeline.scala index 7d2566865..d031f5d4d 100644 --- a/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/xhmmCNVpipeline.scala +++ b/public/gatk-queue-extensions-public/src/main/qscripts/org/broadinstitute/gatk/queue/qscripts/CNV/xhmmCNVpipeline.scala @@ -134,6 +134,9 @@ class xhmmCNVpipeline extends QScript { @Argument(shortName = "subsegmentGenotypeThreshold", doc = "If genotypeSubsegments, this is the default genotype quality threshold for the sub-segments", required = false) var subsegmentGenotypeThreshold = 20.0 + @Argument(shortName = "addGenotypeRegions", doc = "Additional interval list files to be genotyped", required = false) + var addGenotypeRegions: List[File] = List[File]() + @Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false) var longJobQueue: String = "" @@ -314,41 +317,7 @@ class xhmmCNVpipeline extends QScript { val discover = new DiscoverCNVs(filterZscore.filteredZscored, filterOriginal.sameFiltered) add(discover) - - abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, outName: String) extends SamplesScatterable(xhmmExec, groups) with LongRunTime { - @Input(doc = "") - val input = inputParam - - @Input(doc = "") - val xhmmParams = xhmmParamsArg - - @Input(doc = "") - val origRD = origRDParam - - @Input(doc = "") - val inXcnv = xcnv - - @Output - @Gather(classOf[MergeVCFsGatherFunction]) - val vcf: File = new File(outName) - - override def commandLine = - xhmmExec + " --genotype" + - " -p " + xhmmParams + - " -r " + input + - " -g " + inXcnv + - " -F " + referenceFile + - " -R " + origRD + - " -v " + vcf + - " " + genotypeCommandLineParams + - " " + addCommand - } - - class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".vcf") { - override def description = "Genotypes discovered CNVs in all samples: " + commandLine - } - - class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".subsegments.vcf") { + class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".subsegments.vcf", xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) { override def commandLine = super.commandLine + " --subsegments" + @@ -358,13 +327,19 @@ class xhmmCNVpipeline extends QScript { override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + commandLine } - val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered) + val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered, outputBase, xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime add(genotype) if (genotypeSubsegments) { - val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered) + val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered, xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime add(genotypeSegs) } + + addGenotypeRegions :+= prepTargets.out + for (regionsFile <- addGenotypeRegions) { + val genotypeRegions = new GenotypeCNVs(filterZscore.filteredZscored, regionsFile, filterOriginal.sameFiltered, new File(outputBase.getParent + "/" + regionsFile.getName.replace(".interval_list", "") + "." + outputBase.getName), xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime + add(genotypeRegions) + } } class ExcludeTargetsBasedOnValue(locus_valueIn : File, outSuffix : String, minVal : Double, maxVal : Double) extends InProcessFunction { @@ -531,85 +506,3 @@ class xhmmCNVpipeline extends QScript { override def description = "Filters original read-depth data to be the same as filtered, normalized data: " + command } } - - -abstract class SamplesScatterable(val xhmmExec: File, val groups: List[Group]) extends ScatterGatherableFunction with CommandLineFunction { - this.scatterCount = groups.size - this.scatterClass = classOf[SamplesScatterFunction] - - @Input(doc = "", required=false) - var keepSampleIDs: Option[String] = None - - def addCommand = if (keepSampleIDs.isDefined) ("--keepSampleIDs " + keepSampleIDs.get) else "" -} - -class SamplesScatterFunction extends ScatterFunction with InProcessFunction { - protected var groups: List[Group] = _ - override def scatterCount = groups.size - - @Output(doc="Scatter function outputs") - var scatterSamples: Seq[File] = Nil - - override def init() { - this.groups = this.originalFunction.asInstanceOf[SamplesScatterable].groups - } - - override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) { - val scatterPart = IOUtils.absolute(cloneFunction.commandDirectory, "keepSampleIDs.txt") - cloneFunction.setFieldValue("keepSampleIDs", Some(scatterPart)) - this.scatterSamples :+= scatterPart - } - - override def run() { - if (groups.size != this.scatterSamples.size) - throw new Exception("Internal inconsistency error in scattering jobs") - - (groups, this.scatterSamples).zipped foreach { - (group, sampsFile) => { - val sampsWriter = new PrintWriter(new PrintStream(sampsFile)) - - for (samp <- group.samples) { - try { - sampsWriter.printf("%s%n", samp) - } - catch { - case e: Exception => throw e - } - } - sampsWriter.close - } - } - } -} - -trait MergeVCFs extends CommandLineFunction { - var xhmmExec: File = _ - - @Input(doc = "") - var inputVCFs: List[File] = Nil - - @Output - var mergedVCF: File = null - - override def commandLine = - xhmmExec + " --mergeVCFs" + - inputVCFs.map(input => " --mergeVCF " + input).reduceLeft(_ + "" + _) + - " -v " + mergedVCF - - override def description = "Combines VCF outputs for multiple samples (at same loci): " + commandLine -} - -class MergeVCFsGatherFunction extends MergeVCFs with GatherFunction { - override def freezeFieldValues() { - super.freezeFieldValues() - - this.xhmmExec = originalFunction.asInstanceOf[SamplesScatterable].xhmmExec - - this.inputVCFs = this.gatherParts.toList - this.mergedVCF = this.originalOutput - } -} - -class DummyGatherFunction extends InProcessFunction with GatherFunction { - override def run() {} -} diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/DoC/package.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/DoC/package.scala index 0bedbf543..fc999d04a 100644 --- a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/DoC/package.scala +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/DoC/package.scala @@ -40,6 +40,7 @@ import org.broadinstitute.gatk.utils.downsampling.DownsampleType // due to ongoing bugs with inner classes/objects in package objects: // https://issues.scala-lang.org/browse/SI-4344 // https://issues.scala-lang.org/browse/SI-5954 + class DoC(val bams: List[File], val DoC_output: File, val countType: CoverageUtils.CountPileupType, val MAX_DEPTH: Int, val minMappingQuality: Int, val minBaseQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int], val sampleRenameMappingFile: Option[File] = None) extends CommandLineGATK with ScatterGatherableFunction { val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary" diff --git a/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/XHMM/package.scala b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/XHMM/package.scala new file mode 100644 index 000000000..36fcdc74d --- /dev/null +++ b/public/gatk-queue-extensions-public/src/main/scala/org/broadinstitute/gatk/queue/extensions/gatk/XHMM/package.scala @@ -0,0 +1,156 @@ +/* +* Copyright (c) 2012 The Broad Institute +* +* Permission is hereby granted, free of charge, to any person +* obtaining a copy of this software and associated documentation +* files (the "Software"), to deal in the Software without +* restriction, including without limitation the rights to use, +* copy, modify, merge, publish, distribute, sublicense, and/or sell +* copies of the Software, and to permit persons to whom the +* Software is furnished to do so, subject to the following +* conditions: +* +* The above copyright notice and this permission notice shall be +* included in all copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +* THE USE OR OTHER DEALINGS IN THE SOFTWARE. +*/ + +package org.broadinstitute.gatk.queue.extensions.gatk + +import org.broadinstitute.gatk.queue.extensions.gatk._ +import org.broadinstitute.gatk.queue.QScript +import org.broadinstitute.gatk.queue.extensions.gatk.DoC._ +import org.broadinstitute.gatk.utils.commandline._ +import java.io.{File, PrintStream, PrintWriter} +import collection.JavaConversions._ +import org.broadinstitute.gatk.queue.function.scattergather.{CloneFunction, ScatterFunction, GatherFunction, ScatterGatherableFunction} +import org.broadinstitute.gatk.queue.function.{CommandLineFunction, InProcessFunction} +import org.broadinstitute.gatk.utils.io.IOUtils + +// Minimal refactor from a package object to a file full of classes/objects +// due to ongoing bugs with inner classes/objects in package objects: +// https://issues.scala-lang.org/browse/SI-4344 +// https://issues.scala-lang.org/browse/SI-5954 + + abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, outName: String, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends SamplesScatterable(xhmmExec, groups) { + @Input(doc = "") + val input = inputParam + + @Input(doc = "") + val xhmmParams = xhmmParamsArg + + @Input(doc = "") + val origRD = origRDParam + + @Input(doc = "") + val inXcnv = xcnv + + @Output + @Gather(classOf[MergeVCFsGatherFunction]) + val vcf: File = new File(outName) + + override def commandLine = + xhmmExec + " --genotype" + + " -p " + xhmmParams + + " -r " + input + + " -g " + inXcnv + + " -F " + referenceFile + + " -R " + origRD + + " -v " + vcf + + " " + genotypeCommandLineParams + + " " + addCommand + } + + class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, genotypeOutputBase: File, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, genotypeOutputBase.getPath + ".vcf", xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) { + override def description = "Genotypes CNV regions in all samples: " + commandLine + } + + +abstract class SamplesScatterable(val xhmmExec: File, val groups: List[Group]) extends ScatterGatherableFunction with CommandLineFunction { + this.scatterCount = groups.size + this.scatterClass = classOf[SamplesScatterFunction] + + @Input(doc = "", required=false) + var keepSampleIDs: Option[String] = None + + def addCommand = if (keepSampleIDs.isDefined) ("--keepSampleIDs " + keepSampleIDs.get) else "" +} + +class SamplesScatterFunction extends ScatterFunction with InProcessFunction { + protected var groups: List[Group] = _ + override def scatterCount = groups.size + + @Output(doc="Scatter function outputs") + var scatterSamples: Seq[File] = Nil + + override def init() { + this.groups = this.originalFunction.asInstanceOf[SamplesScatterable].groups + } + + override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) { + val scatterPart = IOUtils.absolute(cloneFunction.commandDirectory, "keepSampleIDs.txt") + cloneFunction.setFieldValue("keepSampleIDs", Some(scatterPart)) + this.scatterSamples :+= scatterPart + } + + override def run() { + if (groups.size != this.scatterSamples.size) + throw new Exception("Internal inconsistency error in scattering jobs") + + (groups, this.scatterSamples).zipped foreach { + (group, sampsFile) => { + val sampsWriter = new PrintWriter(new PrintStream(sampsFile)) + + for (samp <- group.samples) { + try { + sampsWriter.printf("%s%n", samp) + } + catch { + case e: Exception => throw e + } + } + sampsWriter.close + } + } + } +} + +trait MergeVCFs extends CommandLineFunction { + var xhmmExec: File = _ + + @Input(doc = "") + var inputVCFs: List[File] = Nil + + @Output + var mergedVCF: File = null + + override def commandLine = + xhmmExec + " --mergeVCFs" + + inputVCFs.map(input => " --mergeVCF " + input).reduceLeft(_ + "" + _) + + " -v " + mergedVCF + + override def description = "Combines VCF outputs for multiple samples (at same loci): " + commandLine +} + +class MergeVCFsGatherFunction extends MergeVCFs with GatherFunction { + override def freezeFieldValues() { + super.freezeFieldValues() + + this.xhmmExec = originalFunction.asInstanceOf[SamplesScatterable].xhmmExec + + this.inputVCFs = this.gatherParts.toList + this.mergedVCF = this.originalOutput + } +} + +class DummyGatherFunction extends InProcessFunction with GatherFunction { + override def run() {} +}