Want to get this into Ryan's hands asap: First working version of a distributed scatter function.
More refactoring to do so that other new scatter functions can be implemented very easily and annotated on walkers. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5363 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
0181d95fe4
commit
a0309e7fb0
|
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.queue.extensions.gatk
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import org.broadinstitute.sting.queue.function.scattergather.CloneFunction
|
||||||
|
import org.broadinstitute.sting.queue.function.InProcessFunction
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An scatter function that uses the Distributed GATK.
|
||||||
|
*/
|
||||||
|
class DistributedScatterFunction extends GATKScatterFunction with InProcessFunction {
|
||||||
|
private final val processingTracker = "processingTracker"
|
||||||
|
|
||||||
|
this.scatterOutputFiles = List(new File(processingTracker))
|
||||||
|
|
||||||
|
override def initCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||||
|
cloneFunction.setFieldValue("processingTracker", new File(this.commandDirectory, this.processingTracker))
|
||||||
|
}
|
||||||
|
|
||||||
|
override def bindCloneInputs(cloneFunction: CloneFunction, index: Int) {
|
||||||
|
/* no further work needed after init. */
|
||||||
|
}
|
||||||
|
|
||||||
|
def run() {
|
||||||
|
/* doesn't actually need to run. */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -28,9 +28,8 @@ import org.broadinstitute.sting.utils.interval.IntervalUtils
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.queue.util.IOUtils
|
import org.broadinstitute.sting.queue.util.IOUtils
|
||||||
import org.broadinstitute.sting.queue.function.QFunction
|
|
||||||
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterGatherableFunction, ScatterFunction}
|
import org.broadinstitute.sting.queue.function.scattergather.{CloneFunction, ScatterGatherableFunction, ScatterFunction}
|
||||||
import org.broadinstitute.sting.commandline.{Output, ArgumentSource}
|
import org.broadinstitute.sting.commandline.Output
|
||||||
|
|
||||||
trait GATKScatterFunction extends ScatterFunction {
|
trait GATKScatterFunction extends ScatterFunction {
|
||||||
/** The total number of clone jobs that will be created. */
|
/** The total number of clone jobs that will be created. */
|
||||||
|
|
@ -40,10 +39,10 @@ trait GATKScatterFunction extends ScatterFunction {
|
||||||
protected var referenceSequence: File = _
|
protected var referenceSequence: File = _
|
||||||
|
|
||||||
/** The runtime field to set for specifying an interval file. */
|
/** The runtime field to set for specifying an interval file. */
|
||||||
protected var intervalsField: ArgumentSource = _
|
private final val intervalsField = "intervals"
|
||||||
|
|
||||||
/** The runtime field to set for specifying an interval string. */
|
/** The runtime field to set for specifying an interval string. */
|
||||||
protected var intervalsStringField: ArgumentSource = _
|
private final val intervalsStringField = "intervalsString"
|
||||||
|
|
||||||
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
/** The list of interval files ("/path/to/interval.list") or interval strings ("chr1", "chr2") to parse into smaller parts. */
|
||||||
protected var intervals: List[String] = Nil
|
protected var intervals: List[String] = Nil
|
||||||
|
|
@ -60,22 +59,19 @@ trait GATKScatterFunction extends ScatterFunction {
|
||||||
* @return true if the function is a GATK function with the reference sequence set.
|
* @return true if the function is a GATK function with the reference sequence set.
|
||||||
* @throws IllegalArgumentException if -BTI or -BTIMR are set. QScripts should not try to scatter gather with those option set.
|
* @throws IllegalArgumentException if -BTI or -BTIMR are set. QScripts should not try to scatter gather with those option set.
|
||||||
*/
|
*/
|
||||||
def isScatterGatherable(originalFunction: ScatterGatherableFunction): Boolean = {
|
override def isScatterGatherable(originalFunction: ScatterGatherableFunction): Boolean = {
|
||||||
if (originalFunction.isInstanceOf[CommandLineGATK]) {
|
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
||||||
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
if (gatk.BTI != null && gatk.BTIMR == null)
|
||||||
if ( gatk.BTI != null && gatk.BTIMR == null) throw new IllegalArgumentException("BTI requires BTIMR for use with scatter-gather (recommended: INTERSECTION)")
|
throw new IllegalArgumentException("BTI requires BTIMR for use with scatter-gather (recommended: INTERSECTION)")
|
||||||
gatk.reference_sequence != null
|
gatk.reference_sequence != null
|
||||||
} else false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the scatter gatherable function.
|
* Sets the scatter gatherable function.
|
||||||
* @param originalFunction Function to bind.
|
* @param originalFunction Function to bind.
|
||||||
*/
|
*/
|
||||||
def setScatterGatherable(originalFunction: ScatterGatherableFunction) = {
|
override def setScatterGatherable(originalFunction: ScatterGatherableFunction) = {
|
||||||
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
val gatk = originalFunction.asInstanceOf[CommandLineGATK]
|
||||||
this.intervalsField = QFunction.findField(originalFunction.getClass, "intervals")
|
|
||||||
this.intervalsStringField = QFunction.findField(originalFunction.getClass, "intervalsString")
|
|
||||||
this.referenceSequence = gatk.reference_sequence
|
this.referenceSequence = gatk.reference_sequence
|
||||||
if (gatk.intervals.isEmpty && gatk.intervalsString.isEmpty) {
|
if (gatk.intervals.isEmpty && gatk.intervalsString.isEmpty) {
|
||||||
this.intervals ++= IntervalUtils.distinctContigs(this.referenceSequence).toList
|
this.intervals ++= IntervalUtils.distinctContigs(this.referenceSequence).toList
|
||||||
|
|
|
||||||
|
|
@ -1,47 +1,31 @@
|
||||||
package org.broadinstitute.sting.queue.extensions.gatk
|
package org.broadinstitute.sting.queue.extensions.gatk
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.function.InProcessFunction
|
import org.broadinstitute.sting.queue.function.scattergather.{ScatterGatherableFunction, GatherFunction}
|
||||||
import org.broadinstitute.sting.queue.QException
|
|
||||||
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
|
|
||||||
import java.io.{FileReader, PrintWriter}
|
|
||||||
import org.apache.commons.io.{LineIterator, IOUtils, FileUtils}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Merges a vcf text file.
|
* Merges a vcf text file.
|
||||||
*/
|
*/
|
||||||
class VcfGatherFunction extends GatherFunction with InProcessFunction {
|
class VcfGatherFunction extends CombineVariants with GatherFunction {
|
||||||
def run() = {
|
|
||||||
waitForGatherParts
|
|
||||||
if (gatherParts.size < 1) {
|
|
||||||
throw new QException("No files to gather to output: " + originalOutput)
|
|
||||||
} else {
|
|
||||||
val writer = new PrintWriter(originalOutput)
|
|
||||||
try {
|
|
||||||
var reader = new FileReader(gatherParts(0))
|
|
||||||
try {
|
|
||||||
IOUtils.copy(reader, writer)
|
|
||||||
} finally {
|
|
||||||
IOUtils.closeQuietly(reader)
|
|
||||||
}
|
|
||||||
|
|
||||||
for (file <- gatherParts.tail) {
|
private var originalGATK: CommandLineGATK = _
|
||||||
var inHeaders = true
|
|
||||||
val itor = FileUtils.lineIterator(file)
|
override def setScatterGatherable(originalFunction: ScatterGatherableFunction) {
|
||||||
try {
|
this.originalGATK = originalFunction.asInstanceOf[CommandLineGATK]
|
||||||
while (itor.hasNext) {
|
|
||||||
val nextLine = itor.nextLine
|
|
||||||
if (inHeaders && nextLine(0) != '#')
|
|
||||||
inHeaders = false
|
|
||||||
if (!inHeaders)
|
|
||||||
writer.println(nextLine)
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
LineIterator.closeQuietly(itor)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
IOUtils.closeQuietly(writer)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
override def freezeFieldValues = {
|
||||||
|
this.memoryLimit = Some(1)
|
||||||
|
|
||||||
|
this.jarFile = this.originalGATK.jarFile
|
||||||
|
this.reference_sequence = this.originalGATK.reference_sequence
|
||||||
|
this.intervals = this.originalGATK.intervals
|
||||||
|
this.intervalsString = this.originalGATK.intervalsString
|
||||||
|
|
||||||
|
this.rodBind = this.gatherParts.zipWithIndex map { case (input, index) => new RodBind("input"+index, "VCF", input) }
|
||||||
|
this.rod_priority_list = (0 until this.gatherParts.size).map("input"+_).mkString(",")
|
||||||
|
this.out = this.originalOutput
|
||||||
|
this.assumeIdenticalSamples = true
|
||||||
|
|
||||||
|
super.freezeFieldValues
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
package org.broadinstitute.sting.queue.function.scattergather
|
package org.broadinstitute.sting.queue.function.scattergather
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.function.CommandLineFunction
|
|
||||||
import org.broadinstitute.sting.commandline.ArgumentSource
|
import org.broadinstitute.sting.commandline.ArgumentSource
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import org.broadinstitute.sting.queue.QException
|
import org.broadinstitute.sting.queue.QException
|
||||||
|
import org.broadinstitute.sting.queue.function.{QFunction, CommandLineFunction}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Shadow clones another command line function.
|
* Shadow clones another command line function.
|
||||||
|
|
@ -47,7 +47,12 @@ class CloneFunction extends CommandLineFunction {
|
||||||
|
|
||||||
def commandLine = withScatterPart(() => originalFunction.commandLine)
|
def commandLine = withScatterPart(() => originalFunction.commandLine)
|
||||||
|
|
||||||
override def getFieldValue(source: ArgumentSource) = {
|
def getFieldValue(field: String): AnyRef = {
|
||||||
|
val source = QFunction.findField(originalFunction.getClass, field)
|
||||||
|
getFieldValue(source)
|
||||||
|
}
|
||||||
|
|
||||||
|
override def getFieldValue(source: ArgumentSource): AnyRef = {
|
||||||
source.field.getName match {
|
source.field.getName match {
|
||||||
case "jobOutputFile" => jobOutputFile
|
case "jobOutputFile" => jobOutputFile
|
||||||
case "jobErrorFile" => jobErrorFile
|
case "jobErrorFile" => jobErrorFile
|
||||||
|
|
@ -62,6 +67,11 @@ class CloneFunction extends CommandLineFunction {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def setFieldValue(field: String, value: Any): Unit = {
|
||||||
|
val source = QFunction.findField(originalFunction.getClass, field)
|
||||||
|
setFieldValue(source, value)
|
||||||
|
}
|
||||||
|
|
||||||
override def setFieldValue(source: ArgumentSource, value: Any): Unit = {
|
override def setFieldValue(source: ArgumentSource, value: Any): Unit = {
|
||||||
source.field.getName match {
|
source.field.getName match {
|
||||||
case "jobOutputFile" => jobOutputFile = value.asInstanceOf[File]
|
case "jobOutputFile" => jobOutputFile = value.asInstanceOf[File]
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,12 @@ trait GatherFunction extends QFunction {
|
||||||
@Output(doc="The original output of the scattered function")
|
@Output(doc="The original output of the scattered function")
|
||||||
var originalOutput: File = _
|
var originalOutput: File = _
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the original ScatterGatherableFunction to be gathered.
|
||||||
|
* @param originalFunction The original function to with inputs bind to this scatter function.
|
||||||
|
*/
|
||||||
|
def setScatterGatherable(originalFunction: ScatterGatherableFunction) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Waits for gather parts to propagate over NFS or throws an exception.
|
* Waits for gather parts to propagate over NFS or throws an exception.
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -16,13 +16,13 @@ trait ScatterFunction extends QFunction {
|
||||||
* @param originalFunction The original function to check.
|
* @param originalFunction The original function to check.
|
||||||
* @return true if the scatter function can scatter this original function.
|
* @return true if the scatter function can scatter this original function.
|
||||||
*/
|
*/
|
||||||
def isScatterGatherable(originalFunction: ScatterGatherableFunction): Boolean
|
def isScatterGatherable(originalFunction: ScatterGatherableFunction) = true
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets the original ScatterGatherableFunction to be scattered.
|
* Sets the original ScatterGatherableFunction to be scattered.
|
||||||
* @param originalFunction The original function to with inputs bind to this scatter function.
|
* @param originalFunction The original function to with inputs bind to this scatter function.
|
||||||
*/
|
*/
|
||||||
def setScatterGatherable(originalFunction: ScatterGatherableFunction)
|
def setScatterGatherable(originalFunction: ScatterGatherableFunction) {}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* After a call to setScatterGatherable(), returns the number of clones that should be created.
|
* After a call to setScatterGatherable(), returns the number of clones that should be created.
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
||||||
scatterFunction.isIntermediate = true
|
scatterFunction.isIntermediate = true
|
||||||
scatterFunction.setScatterGatherable(this)
|
scatterFunction.setScatterGatherable(this)
|
||||||
initScatterFunction(scatterFunction)
|
initScatterFunction(scatterFunction)
|
||||||
|
scatterFunction.absoluteCommandDirectory()
|
||||||
functions :+= scatterFunction
|
functions :+= scatterFunction
|
||||||
|
|
||||||
// Ask the scatter function how many clones to create.
|
// Ask the scatter function how many clones to create.
|
||||||
|
|
@ -98,7 +99,9 @@ trait ScatterGatherableFunction extends CommandLineFunction {
|
||||||
gatherFunction.addOrder = this.addOrder :+ gatherAddOrder
|
gatherFunction.addOrder = this.addOrder :+ gatherAddOrder
|
||||||
gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName)
|
gatherFunction.commandDirectory = this.scatterGatherTempDir("gather-" + gatherField.field.getName)
|
||||||
gatherFunction.originalOutput = gatherOutput
|
gatherFunction.originalOutput = gatherOutput
|
||||||
|
gatherFunction.setScatterGatherable(this)
|
||||||
initGatherFunction(gatherFunction, gatherField)
|
initGatherFunction(gatherFunction, gatherField)
|
||||||
|
gatherFunction.absoluteCommandDirectory()
|
||||||
functions :+= gatherFunction
|
functions :+= gatherFunction
|
||||||
gatherFunctions += gatherField -> gatherFunction
|
gatherFunctions += gatherField -> gatherFunction
|
||||||
gatherOutputs += gatherField -> gatherOutput
|
gatherOutputs += gatherField -> gatherOutput
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue