Using an embedded version of Picard for merging un-indexed bam files after scatter/gather instead of requiring the QScripts to specify the picard JAR. May do this for the GATK jar too.

Fixed initialization of pending counts when using -startFromScratch so the count doesn't start at zero and end at -<#njobs>.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5483 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
kshakir 2011-03-21 18:20:01 +00:00
parent 8a0e813b04
commit f6d4b0aaf5
9 changed files with 186 additions and 44 deletions

View File

@ -15,6 +15,9 @@
<!-- Scala -->
<package name="scala.**" />
<!-- Picard -->
<package name="net.sf.picard.**" />
<!-- JNA, including embedded native libraries -->
<dir name="com/sun/jna" />
</dependencies>

View File

@ -1,7 +1,5 @@
import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk.{RealignerTargetCreator, RodBind, IndelRealigner}
import org.broadinstitute.sting.commandline.ArgumentSource
import org.broadinstitute.sting.queue.extensions.gatk.BamGatherFunction
/**
* Created by IntelliJ IDEA.
@ -69,13 +67,6 @@ class justClean extends QScript {
clean.memoryLimit = Some(6)
clean.scatterCount = 84
clean.setupGatherFunction = {
case (gather: BamGatherFunction, source: ArgumentSource) =>
gather.memoryLimit = Some(6) // Memory limit you expect for the job
gather.jarFile = new File("/seq/software/picard/current/bin/MergeSamFiles.jar")
}
add(clean);
}
}

View File

@ -1,7 +1,7 @@
import org.broadinstitute.sting.commandline.ArgumentSource
import org.broadinstitute.sting.datasources.pipeline.Pipeline
import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
import org.broadinstitute.sting.queue.extensions.picard.PicardBamEmbeddedFunction
import org.broadinstitute.sting.queue.extensions.samtools._
import org.broadinstitute.sting.queue.function.ListWriterFunction
import org.broadinstitute.sting.queue.function.scattergather.{GatherFunction, CloneFunction, ScatterFunction}
@ -16,9 +16,6 @@ class FullCallingPipeline extends QScript {
@Argument(doc="the YAML file specifying inputs, interval lists, reference sequence, etc.", shortName="Y")
var yamlFile: File = _
@Input(doc="path to Picard FixMateInformation.jar. See http://picard.sourceforge.net/ .", shortName="P", required=false)
var picardFixMatesJar: File = new java.io.File("/seq/software/picard/current/bin/FixMateInformation.jar")
@Input(doc="path to GATK jar", shortName="G")
var gatkJar: File = _
@ -39,6 +36,8 @@ class FullCallingPipeline extends QScript {
private var pipeline: Pipeline = _
private final val picardFixMatesClass = "net.sf.picard.sam.FixMateInformation"
trait CommandLineGATKArgs extends CommandLineGATK {
this.intervals = List(qscript.pipeline.getProject.getIntervalList)
this.jarFile = qscript.gatkJar
@ -110,7 +109,7 @@ class FullCallingPipeline extends QScript {
gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName))
gather.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
gather.memoryLimit = Some(6)
gather.jarFile = qscript.picardFixMatesJar
gather.mainClass = picardFixMatesClass
gather.assumeSorted = None
case (gather: GatherFunction, source: ArgumentSource) =>
gather.commandDirectory = new File("CleanedBams/IntermediateFiles/%s/ScatterGather/Gather_%s".format(sampleId, source.field.getName))
@ -123,7 +122,7 @@ class FullCallingPipeline extends QScript {
realigner.isIntermediate = true
// Explicitly run fix mates if the function won't be scattered.
val fixMates = new PicardBamJarFunction {
val fixMates = new PicardBamEmbeddedFunction {
@Input(doc="unfixed bam") var unfixed: File = _
@Output(doc="fixed bam") var fixed: File = _
def inputBams = List(unfixed)
@ -132,7 +131,7 @@ class FullCallingPipeline extends QScript {
fixMates.jobOutputFile = new File(".queue/logs/Cleaning/%s/FixMates.out".format(sampleId))
fixMates.memoryLimit = Some(6)
fixMates.jarFile = qscript.picardFixMatesJar
fixMates.mainClass = picardFixMatesClass
fixMates.unfixed = realigner.out
fixMates.fixed = cleaned_bam
fixMates.analysisName = "FixMates_"+sampleId

View File

@ -310,11 +310,10 @@ class QGraph extends Logging {
* Dry-runs the jobs by traversing the graph.
*/
private def dryRunJobs() {
if (settings.startFromScratch) {
if (settings.startFromScratch)
logger.info("Will remove outputs from previous runs.")
foreachFunction(_.resetToPending(false))
} else
updateGraphStatus(false)
updateGraphStatus(false)
var readyJobs = getReadyJobs()
while (running && readyJobs.size > 0) {
@ -361,11 +360,10 @@ class QGraph extends Logging {
settings.jobRunner = "Shell"
commandLineManager = commandLinePluginManager.createByName(settings.jobRunner)
if (settings.startFromScratch) {
if (settings.startFromScratch)
logger.info("Removing outputs from previous runs.")
foreachFunction(_.resetToPending(true))
} else
updateGraphStatus(true)
updateGraphStatus(true)
var readyJobs = TreeSet.empty[FunctionEdge](functionOrdering)
readyJobs ++= getReadyJobs()
@ -458,7 +456,10 @@ class QGraph extends Logging {
* @param cleanOutputs If true will delete outputs when setting edges to pending.
*/
private def updateGraphStatus(cleanOutputs: Boolean) {
traverseFunctions(edge => checkDone(edge, cleanOutputs))
if (settings.startFromScratch)
foreachFunction(edge => edge.resetToPending(cleanOutputs))
else
traverseFunctions(edge => checkDone(edge, cleanOutputs))
traverseFunctions(edge => recheckDone(edge))
}

View File

@ -1,14 +1,19 @@
package org.broadinstitute.sting.queue.extensions.gatk
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
import org.broadinstitute.sting.queue.extensions.picard.PicardBamEmbeddedFunction
/**
* Merges BAM files using Picards MergeSamFiles.jar.
* At the Broad the jar can be found at /seq/software/picard/current/bin/MergeSamFiles.jar. Outside the broad see http://picard.sourceforge.net/")
* Merges BAM files using Picards net.sf.picard.sam.MergeSamFiles.
*/
class BamGatherFunction extends GatherFunction with PicardBamJarFunction {
class BamGatherFunction extends GatherFunction with PicardBamEmbeddedFunction {
this.mainClass = "net.sf.picard.sam.MergeSamFiles"
this.assumeSorted = Some(true)
protected def inputBams = gatherParts
protected def outputBam = originalOutput
override def init() {
// Whatever the original function can handle, merging *should* do less.
this.memoryLimit = originalFunction.memoryLimit
}
}

View File

@ -0,0 +1,37 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.queue.function.EmbeddedCommandLineFunction
/**
* Wraps a Picard embedded class that operates on BAM files.
* See http://picard.sourceforge.net/ for more info.
*
* Since the command lines take slightly different arguments
* some values are optional.
*/
trait PicardBamEmbeddedFunction extends EmbeddedCommandLineFunction with PicardBamFunction {
}

View File

@ -0,0 +1,51 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.picard
import java.io.File
import org.broadinstitute.sting.queue.function.CommandLineFunction
/**
* Wraps a Picard function that operates on BAM files.
* See http://picard.sourceforge.net/ for more info.
*
* Since the various BAM utilities take slightly different arguments
* some values are optional.
*/
trait PicardBamFunction extends CommandLineFunction {
var validationStringency = "SILENT"
var sortOrder = "coordinate"
var compressionLevel: Option[Int] = None
var maxRecordsInRam: Option[Int] = None
var assumeSorted: Option[Boolean] = None
protected def inputBams: List[File]
protected def outputBam: File
abstract override def commandLine = super.commandLine +
Array(optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency),
optional(" SO=", sortOrder), optional( " MAX_RECORDS_IN_RAM=", maxRecordsInRam), optional(" ASSUME_SORTED=", assumeSorted),
" OUTPUT=" + outputBam, repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir).mkString
}

View File

@ -1,7 +1,30 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.queue.function.JarCommandLineFunction
import java.io.File
/**
* Wraps a Picard jar that operates on BAM files.
@ -10,18 +33,5 @@ import java.io.File
* Since the jar files take slightly different arguments
* some values are optional.
*/
trait PicardBamJarFunction extends JarCommandLineFunction {
var validationStringency = "SILENT"
var sortOrder = "coordinate"
var compressionLevel: Option[Int] = None
var maxRecordsInRam: Option[Int] = None
var assumeSorted: Option[Boolean] = None
protected def inputBams: List[File]
protected def outputBam: File
override def commandLine = super.commandLine + "%s%s%s%s%s%s%s%s".format(
optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency),
optional(" SO=", sortOrder), optional( " MAX_RECORDS_IN_RAM=", maxRecordsInRam), optional(" ASSUME_SORTED=", assumeSorted),
" OUTPUT=" + outputBam, repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir)
trait PicardBamJarFunction extends JarCommandLineFunction with PicardBamFunction {
}

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.queue.function
import java.io.File
import org.broadinstitute.sting.queue.util.IOUtils
import org.broadinstitute.sting.commandline.Argument
/**
* Defines a command line function that runs java code from inside the existing class path.
*/
trait EmbeddedCommandLineFunction extends JavaCommandLineFunction {
@Argument(doc="Main class to run from the current classpath")
var mainClass: String = null
def javaExecutable = "-cp %s %s".format(EmbeddedCommandLineFunction.classpath, mainClass)
}
object EmbeddedCommandLineFunction {
private val classpath = System.getProperty("java.class.path")
.split(File.pathSeparatorChar).map(path => IOUtils.absolute(new File(path)))
.mkString("\"", "\"" + File.pathSeparator + "\"", "\"")
}