Bam gathering passes on the compression_level and the create_index flag to MergeSamFiles.
VCF gathering passes on the no_header and sites_only flags to CombineVariants. Fixed deletion of gathered log files. Although they are intermediate and do not need to be re-run if not present, they should not be deleted. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5508 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
47279ee56e
commit
3e3ff4a9e7
|
|
@ -42,13 +42,13 @@ import java.io.OutputStream;
|
|||
* Insert a SAMFileWriterStub instead of a full-fledged concrete OutputStream implementations.
|
||||
*/
|
||||
public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
private static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM";
|
||||
private static final String DEFAULT_ARGUMENT_SHORTNAME = "ob";
|
||||
public static final String DEFAULT_ARGUMENT_FULLNAME = "outputBAM";
|
||||
public static final String DEFAULT_ARGUMENT_SHORTNAME = "ob";
|
||||
|
||||
private static final String COMPRESSION_FULLNAME = "bam_compression";
|
||||
private static final String COMPRESSION_SHORTNAME = "compress";
|
||||
public static final String COMPRESSION_FULLNAME = "bam_compression";
|
||||
public static final String COMPRESSION_SHORTNAME = "compress";
|
||||
|
||||
private static final String CREATE_INDEX_FULLNAME = "index_output_bam_on_the_fly";
|
||||
public static final String CREATE_INDEX_FULLNAME = "index_output_bam_on_the_fly";
|
||||
|
||||
/**
|
||||
* The engine into which output stubs should be fed.
|
||||
|
|
|
|||
|
|
@ -41,9 +41,9 @@ import java.util.*;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
private static final String NO_HEADER_ARG_NAME = "NO_HEADER";
|
||||
private static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
private static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||
public static final String NO_HEADER_ARG_NAME = "NO_HEADER";
|
||||
public static final String SITES_ONLY_ARG_NAME = "sites_only";
|
||||
public static final HashSet<String> SUPPORTED_ZIPPED_SUFFIXES = new HashSet<String>();
|
||||
|
||||
//
|
||||
// static list of zipped suffixes supported by this system.
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ class QGraph extends Logging {
|
|||
private var running = true
|
||||
private val runningLock = new Object
|
||||
private var runningJobs = Set.empty[FunctionEdge]
|
||||
private var intermediatesJobs = Set.empty[FunctionEdge]
|
||||
private var cleanupJobs = Set.empty[FunctionEdge]
|
||||
|
||||
private val nl = "%n".format()
|
||||
|
||||
|
|
@ -394,7 +394,7 @@ class QGraph extends Logging {
|
|||
logStatusCounts
|
||||
logNextStatusCounts = false
|
||||
|
||||
deleteDoneIntermediates(lastRunningCheck)
|
||||
deleteCleanup(lastRunningCheck)
|
||||
|
||||
if (readyJobs.size == 0 && runningJobs.size > 0)
|
||||
Thread.sleep(nextRunningCheck(lastRunningCheck))
|
||||
|
|
@ -410,8 +410,8 @@ class QGraph extends Logging {
|
|||
|
||||
runningJobs --= doneJobs
|
||||
runningJobs --= failedJobs
|
||||
if (!settings.keepIntermediates)
|
||||
intermediatesJobs ++= doneJobs.filter(_.function.isIntermediate)
|
||||
|
||||
addCleanup(doneJobs)
|
||||
|
||||
statusCounts.running -= doneJobs.size
|
||||
statusCounts.running -= failedJobs.size
|
||||
|
|
@ -430,7 +430,7 @@ class QGraph extends Logging {
|
|||
}
|
||||
|
||||
logStatusCounts
|
||||
deleteDoneIntermediates(-1)
|
||||
deleteCleanup(-1)
|
||||
} catch {
|
||||
case e =>
|
||||
logger.error("Uncaught error running jobs.", e)
|
||||
|
|
@ -503,19 +503,36 @@ class QGraph extends Logging {
|
|||
|
||||
if (edge.status == RunnerStatus.DONE || edge.status == RunnerStatus.SKIPPED) {
|
||||
logger.debug("Already done: " + edge.function.description)
|
||||
if (!settings.keepIntermediates && edge.function.isIntermediate)
|
||||
intermediatesJobs += edge
|
||||
addCleanup(edge)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the functions should have their outptus removed after they finish running
|
||||
* @param edges Functions to check
|
||||
*/
|
||||
private def addCleanup(edges: Traversable[FunctionEdge]) {
|
||||
edges.foreach(addCleanup(_))
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the function should have their outptus removed after they finish running
|
||||
* @param edges Function to check
|
||||
*/
|
||||
private def addCleanup(edge: FunctionEdge) {
|
||||
if (!settings.keepIntermediates)
|
||||
if (edge.function.isIntermediate && edge.function.deleteIntermediateOutputs)
|
||||
cleanupJobs += edge
|
||||
}
|
||||
|
||||
/**
|
||||
* Continues deleting the outputs of intermediate jobs that are no longer needed until it's time to recheck running status.
|
||||
* @param lastRunningCheck The last time the status was checked.
|
||||
*/
|
||||
private def deleteDoneIntermediates(lastRunningCheck: Long) {
|
||||
private def deleteCleanup(lastRunningCheck: Long) {
|
||||
var doneJobs = Set.empty[FunctionEdge]
|
||||
|
||||
for (edge <- intermediatesJobs) {
|
||||
for (edge <- cleanupJobs) {
|
||||
val nextDone = nextFunctions(edge).forall(next => {
|
||||
val status = next.status
|
||||
(status == RunnerStatus.DONE || status == RunnerStatus.SKIPPED)
|
||||
|
|
@ -529,7 +546,7 @@ class QGraph extends Logging {
|
|||
if (running && !readyRunningCheck(lastRunningCheck)) {
|
||||
logger.debug("Deleting intermediates:" + edge.function.description)
|
||||
edge.function.deleteOutputs()
|
||||
intermediatesJobs -= edge
|
||||
cleanupJobs -= edge
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,11 @@ package org.broadinstitute.sting.queue.extensions.gatk
|
|||
|
||||
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction
|
||||
import org.broadinstitute.sting.queue.function.QFunction
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor
|
||||
|
||||
/**
|
||||
* Merges BAM files using Picards net.sf.picard.sam.MergeSamFiles.
|
||||
* Merges BAM files using net.sf.picard.sam.MergeSamFiles.
|
||||
*/
|
||||
class BamGatherFunction extends GatherFunction with PicardBamFunction {
|
||||
this.javaMainClass = "net.sf.picard.sam.MergeSamFiles"
|
||||
|
|
@ -36,8 +38,21 @@ class BamGatherFunction extends GatherFunction with PicardBamFunction {
|
|||
protected def inputBams = gatherParts
|
||||
protected def outputBam = originalOutput
|
||||
|
||||
override def init() {
|
||||
override def freezeFieldValues {
|
||||
val originalGATK = originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
// Whatever the original function can handle, merging *should* do less.
|
||||
this.memoryLimit = originalFunction.memoryLimit
|
||||
|
||||
// bam_compression and index_output_bam_on_the_fly from SAMFileWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
||||
val compression = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.COMPRESSION_FULLNAME)
|
||||
this.compressionLevel = originalGATK.getFieldValue(compression).asInstanceOf[Option[Int]]
|
||||
|
||||
val indexBam = QFunction.findField(originalFunction.getClass, SAMFileWriterArgumentTypeDescriptor.CREATE_INDEX_FULLNAME)
|
||||
this.createIndex = originalGATK.getFieldValue(indexBam).asInstanceOf[Option[Boolean]]
|
||||
|
||||
super.freezeFieldValues
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,8 @@
|
|||
package org.broadinstitute.sting.queue.extensions.gatk
|
||||
|
||||
import org.broadinstitute.sting.queue.function.scattergather.GatherFunction
|
||||
import org.broadinstitute.sting.queue.function.QFunction
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor
|
||||
|
||||
/**
|
||||
* Merges a vcf text file.
|
||||
|
|
@ -33,7 +35,7 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
|
||||
private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
|
||||
|
||||
override def freezeFieldValues = {
|
||||
override def freezeFieldValues {
|
||||
this.memoryLimit = Some(1)
|
||||
|
||||
this.jarFile = this.originalGATK.jarFile
|
||||
|
|
@ -46,6 +48,15 @@ class VcfGatherFunction extends CombineVariants with GatherFunction {
|
|||
this.out = this.originalOutput
|
||||
this.assumeIdenticalSamples = true
|
||||
|
||||
// NO_HEADER and sites_only from VCFWriterArgumentTypeDescriptor
|
||||
// are added by the GATKExtensionsGenerator to the subclass of CommandLineGATK
|
||||
|
||||
val noHeader = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.NO_HEADER_ARG_NAME)
|
||||
this.NO_HEADER = originalGATK.getFieldValue(noHeader).asInstanceOf[Boolean]
|
||||
|
||||
val sitesOnly = QFunction.findField(originalFunction.getClass, VCFWriterArgumentTypeDescriptor.SITES_ONLY_ARG_NAME)
|
||||
this.sites_only = originalGATK.getFieldValue(sitesOnly).asInstanceOf[Boolean]
|
||||
|
||||
super.freezeFieldValues
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,9 +37,10 @@ import net.sf.samtools.SAMFileHeader.SortOrder
|
|||
* some values are optional.
|
||||
*/
|
||||
trait PicardBamFunction extends JavaCommandLineFunction {
|
||||
var validationStringency: ValidationStringency = ValidationStringency.SILENT
|
||||
var sortOrder: SortOrder = SortOrder.coordinate
|
||||
var validationStringency = ValidationStringency.SILENT
|
||||
var sortOrder = SortOrder.coordinate
|
||||
var compressionLevel: Option[Int] = None
|
||||
var createIndex: Option[Boolean] = None
|
||||
var maxRecordsInRam: Option[Int] = None
|
||||
var assumeSorted: Option[Boolean] = None
|
||||
|
||||
|
|
@ -55,5 +56,6 @@ trait PicardBamFunction extends JavaCommandLineFunction {
|
|||
optional(" VALIDATION_STRINGENCY=", validationStringency),
|
||||
optional(" SO=", sortOrder),
|
||||
optional(" MAX_RECORDS_IN_RAM=", maxRecordsInRam),
|
||||
optional(" ASSUME_SORTED=", assumeSorted)).mkString
|
||||
optional(" ASSUME_SORTED=", assumeSorted),
|
||||
optional(" CREATE_INDEX=", createIndex)).mkString
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue