Add option to genotype additional user-defined interval lists
Add Qscript 'ONLY_GENOTYPE_xhmmCNVpipeline' to genotype additional user-defined interval lists Add Qscript 'ONLY_GENOTYPE_xhmmCNVpipeline' to genotype additional user-defined interval lists (and similar option to Qscript 'xhmmCNVpipeline')
This commit is contained in:
parent
a4a9e73ec8
commit
11cd0080c3
|
|
@ -0,0 +1,103 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.queue.qscripts.CNV
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.queue.extensions.gatk._
|
||||||
|
import org.broadinstitute.gatk.queue.QScript
|
||||||
|
import org.broadinstitute.gatk.queue.util.VCF_BAM_utilities
|
||||||
|
import org.broadinstitute.gatk.queue.extensions.gatk.DoC._
|
||||||
|
import org.broadinstitute.gatk.utils.commandline._
|
||||||
|
import java.io.{File, PrintStream, PrintWriter}
|
||||||
|
import org.broadinstitute.gatk.utils.text.XReadLines
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.gatk.tools.walkers.coverage.CoverageUtils
|
||||||
|
import org.broadinstitute.gatk.queue.function.scattergather.{CloneFunction, ScatterFunction, GatherFunction, ScatterGatherableFunction}
|
||||||
|
import org.broadinstitute.gatk.queue.function.{CommandLineFunction, InProcessFunction}
|
||||||
|
import org.broadinstitute.gatk.utils.io.IOUtils
|
||||||
|
|
||||||
|
class ONLY_GENOTYPE_xhmmCNVpipeline extends QScript {
|
||||||
|
qscript =>
|
||||||
|
|
||||||
|
@Input(doc = "bam input, as as a list of .bam files, or a list of bam files with sample IDs to be used ( as specified at https://www.broadinstitute.org/gatk/gatkdocs/org_broadinstitute_sting_gatk_CommandLineGATK.html#--sample_rename_mapping_file )", shortName = "I", required = true)
|
||||||
|
var bams: File = _
|
||||||
|
|
||||||
|
@Input(doc = "xhmm executable file", shortName = "xhmmExec", required = true)
|
||||||
|
var xhmmExec: File = _
|
||||||
|
|
||||||
|
@Input(shortName = "R", doc = "ref", required = true)
|
||||||
|
var referenceFile: File = _
|
||||||
|
|
||||||
|
@Argument(doc = "Samples to run together for DoC, CNV discovery, and CNV genotyping. By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false)
|
||||||
|
var samplesPerJob = 1
|
||||||
|
|
||||||
|
@Output(doc = "Base name for files to output", shortName = "o", required = true)
|
||||||
|
var outputBase: File = _
|
||||||
|
|
||||||
|
@Argument(shortName = "xhmmParams", doc = "xhmm model parameters file", required = true)
|
||||||
|
var xhmmParamsArg: File = _
|
||||||
|
|
||||||
|
@Argument(shortName = "genotypeParams", doc = "xhmm command-line parameters for genotyping step", required = false)
|
||||||
|
var genotypeCommandLineParams: String = ""
|
||||||
|
|
||||||
|
@Argument(shortName = "addGenotypeRegions", doc = "Additional interval list files to be genotyped", required = false)
|
||||||
|
var addGenotypeRegions: List[File] = List[File]()
|
||||||
|
|
||||||
|
@Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false)
|
||||||
|
var longJobQueue: String = ""
|
||||||
|
|
||||||
|
@Argument(shortName = "filteredZscored", doc = "File of PCA-normalized read depths, after filtering and Z-score calculation", required = true)
|
||||||
|
var filteredZscored: File = _
|
||||||
|
|
||||||
|
@Argument(shortName = "originalSameFiltered", doc = "File of original read depths, using same filters (samples and targets) as Z-score matrix [filteredZscored argument]", required = true)
|
||||||
|
var originalSameFiltered: File = _
|
||||||
|
|
||||||
|
|
||||||
|
trait LongRunTime extends CommandLineFunction {
|
||||||
|
if (longJobQueue != "")
|
||||||
|
this.jobQueue = longJobQueue
|
||||||
|
}
|
||||||
|
|
||||||
|
def script = {
|
||||||
|
val parseMixedInputBamList = parseBamListWithOptionalSampleMappings(bams)
|
||||||
|
|
||||||
|
val processMixedInputBamList = new ProcessBamListWithOptionalSampleMappings(parseMixedInputBamList, outputBase.getPath)
|
||||||
|
add(processMixedInputBamList)
|
||||||
|
|
||||||
|
val samples: List[String] = parseMixedInputBamList.sampleToBams.keys.toList
|
||||||
|
Console.out.printf("Samples are %s%n", samples)
|
||||||
|
|
||||||
|
val groups: List[Group] = buildDoCgroups(samples, parseMixedInputBamList.sampleToBams, samplesPerJob, outputBase)
|
||||||
|
for (group <- groups) {
|
||||||
|
Console.out.printf("Group is %s%n", group)
|
||||||
|
}
|
||||||
|
|
||||||
|
for (regionsFile <- addGenotypeRegions) {
|
||||||
|
val genotypeRegions = new GenotypeCNVs(filteredZscored, regionsFile, originalSameFiltered, new File(outputBase.getParent + "/" + regionsFile.getName.replace(".interval_list", "") + "." + outputBase.getName), xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime
|
||||||
|
add(genotypeRegions)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
@ -134,6 +134,9 @@ class xhmmCNVpipeline extends QScript {
|
||||||
@Argument(shortName = "subsegmentGenotypeThreshold", doc = "If genotypeSubsegments, this is the default genotype quality threshold for the sub-segments", required = false)
|
@Argument(shortName = "subsegmentGenotypeThreshold", doc = "If genotypeSubsegments, this is the default genotype quality threshold for the sub-segments", required = false)
|
||||||
var subsegmentGenotypeThreshold = 20.0
|
var subsegmentGenotypeThreshold = 20.0
|
||||||
|
|
||||||
|
@Argument(shortName = "addGenotypeRegions", doc = "Additional interval list files to be genotyped", required = false)
|
||||||
|
var addGenotypeRegions: List[File] = List[File]()
|
||||||
|
|
||||||
@Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false)
|
@Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false)
|
||||||
var longJobQueue: String = ""
|
var longJobQueue: String = ""
|
||||||
|
|
||||||
|
|
@ -314,41 +317,7 @@ class xhmmCNVpipeline extends QScript {
|
||||||
val discover = new DiscoverCNVs(filterZscore.filteredZscored, filterOriginal.sameFiltered)
|
val discover = new DiscoverCNVs(filterZscore.filteredZscored, filterOriginal.sameFiltered)
|
||||||
add(discover)
|
add(discover)
|
||||||
|
|
||||||
|
class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".subsegments.vcf", xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) {
|
||||||
abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, outName: String) extends SamplesScatterable(xhmmExec, groups) with LongRunTime {
|
|
||||||
@Input(doc = "")
|
|
||||||
val input = inputParam
|
|
||||||
|
|
||||||
@Input(doc = "")
|
|
||||||
val xhmmParams = xhmmParamsArg
|
|
||||||
|
|
||||||
@Input(doc = "")
|
|
||||||
val origRD = origRDParam
|
|
||||||
|
|
||||||
@Input(doc = "")
|
|
||||||
val inXcnv = xcnv
|
|
||||||
|
|
||||||
@Output
|
|
||||||
@Gather(classOf[MergeVCFsGatherFunction])
|
|
||||||
val vcf: File = new File(outName)
|
|
||||||
|
|
||||||
override def commandLine =
|
|
||||||
xhmmExec + " --genotype" +
|
|
||||||
" -p " + xhmmParams +
|
|
||||||
" -r " + input +
|
|
||||||
" -g " + inXcnv +
|
|
||||||
" -F " + referenceFile +
|
|
||||||
" -R " + origRD +
|
|
||||||
" -v " + vcf +
|
|
||||||
" " + genotypeCommandLineParams +
|
|
||||||
" " + addCommand
|
|
||||||
}
|
|
||||||
|
|
||||||
class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".vcf") {
|
|
||||||
override def description = "Genotypes discovered CNVs in all samples: " + commandLine
|
|
||||||
}
|
|
||||||
|
|
||||||
class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, outputBase.getPath + ".subsegments.vcf") {
|
|
||||||
override def commandLine =
|
override def commandLine =
|
||||||
super.commandLine +
|
super.commandLine +
|
||||||
" --subsegments" +
|
" --subsegments" +
|
||||||
|
|
@ -358,13 +327,19 @@ class xhmmCNVpipeline extends QScript {
|
||||||
override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + commandLine
|
override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + commandLine
|
||||||
}
|
}
|
||||||
|
|
||||||
val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
|
val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered, outputBase, xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime
|
||||||
add(genotype)
|
add(genotype)
|
||||||
|
|
||||||
if (genotypeSubsegments) {
|
if (genotypeSubsegments) {
|
||||||
val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
|
val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered, xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime
|
||||||
add(genotypeSegs)
|
add(genotypeSegs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
addGenotypeRegions :+= prepTargets.out
|
||||||
|
for (regionsFile <- addGenotypeRegions) {
|
||||||
|
val genotypeRegions = new GenotypeCNVs(filterZscore.filteredZscored, regionsFile, filterOriginal.sameFiltered, new File(outputBase.getParent + "/" + regionsFile.getName.replace(".interval_list", "") + "." + outputBase.getName), xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) with LongRunTime
|
||||||
|
add(genotypeRegions)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class ExcludeTargetsBasedOnValue(locus_valueIn : File, outSuffix : String, minVal : Double, maxVal : Double) extends InProcessFunction {
|
class ExcludeTargetsBasedOnValue(locus_valueIn : File, outSuffix : String, minVal : Double, maxVal : Double) extends InProcessFunction {
|
||||||
|
|
@ -531,85 +506,3 @@ class xhmmCNVpipeline extends QScript {
|
||||||
override def description = "Filters original read-depth data to be the same as filtered, normalized data: " + command
|
override def description = "Filters original read-depth data to be the same as filtered, normalized data: " + command
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
abstract class SamplesScatterable(val xhmmExec: File, val groups: List[Group]) extends ScatterGatherableFunction with CommandLineFunction {
|
|
||||||
this.scatterCount = groups.size
|
|
||||||
this.scatterClass = classOf[SamplesScatterFunction]
|
|
||||||
|
|
||||||
@Input(doc = "", required=false)
|
|
||||||
var keepSampleIDs: Option[String] = None
|
|
||||||
|
|
||||||
def addCommand = if (keepSampleIDs.isDefined) ("--keepSampleIDs " + keepSampleIDs.get) else ""
|
|
||||||
}
|
|
||||||
|
|
||||||
class SamplesScatterFunction extends ScatterFunction with InProcessFunction {
|
|
||||||
protected var groups: List[Group] = _
|
|
||||||
override def scatterCount = groups.size
|
|
||||||
|
|
||||||
@Output(doc="Scatter function outputs")
|
|
||||||
var scatterSamples: Seq[File] = Nil
|
|
||||||
|
|
||||||
override def init() {
|
|
||||||
this.groups = this.originalFunction.asInstanceOf[SamplesScatterable].groups
|
|
||||||
}
|
|
||||||
|
|
||||||
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
|
||||||
val scatterPart = IOUtils.absolute(cloneFunction.commandDirectory, "keepSampleIDs.txt")
|
|
||||||
cloneFunction.setFieldValue("keepSampleIDs", Some(scatterPart))
|
|
||||||
this.scatterSamples :+= scatterPart
|
|
||||||
}
|
|
||||||
|
|
||||||
override def run() {
|
|
||||||
if (groups.size != this.scatterSamples.size)
|
|
||||||
throw new Exception("Internal inconsistency error in scattering jobs")
|
|
||||||
|
|
||||||
(groups, this.scatterSamples).zipped foreach {
|
|
||||||
(group, sampsFile) => {
|
|
||||||
val sampsWriter = new PrintWriter(new PrintStream(sampsFile))
|
|
||||||
|
|
||||||
for (samp <- group.samples) {
|
|
||||||
try {
|
|
||||||
sampsWriter.printf("%s%n", samp)
|
|
||||||
}
|
|
||||||
catch {
|
|
||||||
case e: Exception => throw e
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sampsWriter.close
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
trait MergeVCFs extends CommandLineFunction {
|
|
||||||
var xhmmExec: File = _
|
|
||||||
|
|
||||||
@Input(doc = "")
|
|
||||||
var inputVCFs: List[File] = Nil
|
|
||||||
|
|
||||||
@Output
|
|
||||||
var mergedVCF: File = null
|
|
||||||
|
|
||||||
override def commandLine =
|
|
||||||
xhmmExec + " --mergeVCFs" +
|
|
||||||
inputVCFs.map(input => " --mergeVCF " + input).reduceLeft(_ + "" + _) +
|
|
||||||
" -v " + mergedVCF
|
|
||||||
|
|
||||||
override def description = "Combines VCF outputs for multiple samples (at same loci): " + commandLine
|
|
||||||
}
|
|
||||||
|
|
||||||
class MergeVCFsGatherFunction extends MergeVCFs with GatherFunction {
|
|
||||||
override def freezeFieldValues() {
|
|
||||||
super.freezeFieldValues()
|
|
||||||
|
|
||||||
this.xhmmExec = originalFunction.asInstanceOf[SamplesScatterable].xhmmExec
|
|
||||||
|
|
||||||
this.inputVCFs = this.gatherParts.toList
|
|
||||||
this.mergedVCF = this.originalOutput
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class DummyGatherFunction extends InProcessFunction with GatherFunction {
|
|
||||||
override def run() {}
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,7 @@ import org.broadinstitute.gatk.utils.downsampling.DownsampleType
|
||||||
// due to ongoing bugs with inner classes/objects in package objects:
|
// due to ongoing bugs with inner classes/objects in package objects:
|
||||||
// https://issues.scala-lang.org/browse/SI-4344
|
// https://issues.scala-lang.org/browse/SI-4344
|
||||||
// https://issues.scala-lang.org/browse/SI-5954
|
// https://issues.scala-lang.org/browse/SI-5954
|
||||||
|
|
||||||
class DoC(val bams: List[File], val DoC_output: File, val countType: CoverageUtils.CountPileupType, val MAX_DEPTH: Int, val minMappingQuality: Int, val minBaseQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int], val sampleRenameMappingFile: Option[File] = None) extends CommandLineGATK with ScatterGatherableFunction {
|
class DoC(val bams: List[File], val DoC_output: File, val countType: CoverageUtils.CountPileupType, val MAX_DEPTH: Int, val minMappingQuality: Int, val minBaseQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int], val sampleRenameMappingFile: Option[File] = None) extends CommandLineGATK with ScatterGatherableFunction {
|
||||||
val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary"
|
val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,156 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2012 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.gatk.queue.extensions.gatk
|
||||||
|
|
||||||
|
import org.broadinstitute.gatk.queue.extensions.gatk._
|
||||||
|
import org.broadinstitute.gatk.queue.QScript
|
||||||
|
import org.broadinstitute.gatk.queue.extensions.gatk.DoC._
|
||||||
|
import org.broadinstitute.gatk.utils.commandline._
|
||||||
|
import java.io.{File, PrintStream, PrintWriter}
|
||||||
|
import collection.JavaConversions._
|
||||||
|
import org.broadinstitute.gatk.queue.function.scattergather.{CloneFunction, ScatterFunction, GatherFunction, ScatterGatherableFunction}
|
||||||
|
import org.broadinstitute.gatk.queue.function.{CommandLineFunction, InProcessFunction}
|
||||||
|
import org.broadinstitute.gatk.utils.io.IOUtils
|
||||||
|
|
||||||
|
// Minimal refactor from a package object to a file full of classes/objects
|
||||||
|
// due to ongoing bugs with inner classes/objects in package objects:
|
||||||
|
// https://issues.scala-lang.org/browse/SI-4344
|
||||||
|
// https://issues.scala-lang.org/browse/SI-5954
|
||||||
|
|
||||||
|
abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, outName: String, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends SamplesScatterable(xhmmExec, groups) {
|
||||||
|
@Input(doc = "")
|
||||||
|
val input = inputParam
|
||||||
|
|
||||||
|
@Input(doc = "")
|
||||||
|
val xhmmParams = xhmmParamsArg
|
||||||
|
|
||||||
|
@Input(doc = "")
|
||||||
|
val origRD = origRDParam
|
||||||
|
|
||||||
|
@Input(doc = "")
|
||||||
|
val inXcnv = xcnv
|
||||||
|
|
||||||
|
@Output
|
||||||
|
@Gather(classOf[MergeVCFsGatherFunction])
|
||||||
|
val vcf: File = new File(outName)
|
||||||
|
|
||||||
|
override def commandLine =
|
||||||
|
xhmmExec + " --genotype" +
|
||||||
|
" -p " + xhmmParams +
|
||||||
|
" -r " + input +
|
||||||
|
" -g " + inXcnv +
|
||||||
|
" -F " + referenceFile +
|
||||||
|
" -R " + origRD +
|
||||||
|
" -v " + vcf +
|
||||||
|
" " + genotypeCommandLineParams +
|
||||||
|
" " + addCommand
|
||||||
|
}
|
||||||
|
|
||||||
|
class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File, genotypeOutputBase: File, xhmmParamsArg: File, referenceFile: File, genotypeCommandLineParams: String, xhmmExec: File, groups: List[Group]) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam, genotypeOutputBase.getPath + ".vcf", xhmmParamsArg, referenceFile, genotypeCommandLineParams, xhmmExec, groups) {
|
||||||
|
override def description = "Genotypes CNV regions in all samples: " + commandLine
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
abstract class SamplesScatterable(val xhmmExec: File, val groups: List[Group]) extends ScatterGatherableFunction with CommandLineFunction {
|
||||||
|
this.scatterCount = groups.size
|
||||||
|
this.scatterClass = classOf[SamplesScatterFunction]
|
||||||
|
|
||||||
|
@Input(doc = "", required=false)
|
||||||
|
var keepSampleIDs: Option[String] = None
|
||||||
|
|
||||||
|
def addCommand = if (keepSampleIDs.isDefined) ("--keepSampleIDs " + keepSampleIDs.get) else ""
|
||||||
|
}
|
||||||
|
|
||||||
|
class SamplesScatterFunction extends ScatterFunction with InProcessFunction {
|
||||||
|
protected var groups: List[Group] = _
|
||||||
|
override def scatterCount = groups.size
|
||||||
|
|
||||||
|
@Output(doc="Scatter function outputs")
|
||||||
|
var scatterSamples: Seq[File] = Nil
|
||||||
|
|
||||||
|
override def init() {
|
||||||
|
this.groups = this.originalFunction.asInstanceOf[SamplesScatterable].groups
|
||||||
|
}
|
||||||
|
|
||||||
|
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||||
|
val scatterPart = IOUtils.absolute(cloneFunction.commandDirectory, "keepSampleIDs.txt")
|
||||||
|
cloneFunction.setFieldValue("keepSampleIDs", Some(scatterPart))
|
||||||
|
this.scatterSamples :+= scatterPart
|
||||||
|
}
|
||||||
|
|
||||||
|
override def run() {
|
||||||
|
if (groups.size != this.scatterSamples.size)
|
||||||
|
throw new Exception("Internal inconsistency error in scattering jobs")
|
||||||
|
|
||||||
|
(groups, this.scatterSamples).zipped foreach {
|
||||||
|
(group, sampsFile) => {
|
||||||
|
val sampsWriter = new PrintWriter(new PrintStream(sampsFile))
|
||||||
|
|
||||||
|
for (samp <- group.samples) {
|
||||||
|
try {
|
||||||
|
sampsWriter.printf("%s%n", samp)
|
||||||
|
}
|
||||||
|
catch {
|
||||||
|
case e: Exception => throw e
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sampsWriter.close
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
trait MergeVCFs extends CommandLineFunction {
|
||||||
|
var xhmmExec: File = _
|
||||||
|
|
||||||
|
@Input(doc = "")
|
||||||
|
var inputVCFs: List[File] = Nil
|
||||||
|
|
||||||
|
@Output
|
||||||
|
var mergedVCF: File = null
|
||||||
|
|
||||||
|
override def commandLine =
|
||||||
|
xhmmExec + " --mergeVCFs" +
|
||||||
|
inputVCFs.map(input => " --mergeVCF " + input).reduceLeft(_ + "" + _) +
|
||||||
|
" -v " + mergedVCF
|
||||||
|
|
||||||
|
override def description = "Combines VCF outputs for multiple samples (at same loci): " + commandLine
|
||||||
|
}
|
||||||
|
|
||||||
|
class MergeVCFsGatherFunction extends MergeVCFs with GatherFunction {
|
||||||
|
override def freezeFieldValues() {
|
||||||
|
super.freezeFieldValues()
|
||||||
|
|
||||||
|
this.xhmmExec = originalFunction.asInstanceOf[SamplesScatterable].xhmmExec
|
||||||
|
|
||||||
|
this.inputVCFs = this.gatherParts.toList
|
||||||
|
this.mergedVCF = this.originalOutput
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
class DummyGatherFunction extends InProcessFunction with GatherFunction {
|
||||||
|
override def run() {}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue