More updates to the CleanBamFile pipeline.
Added the a CommandLineFunction.jobDependencies that will explicitly force a function to wait for a file, even if the value isn't otherwise listed on an @Input. More bug fixes and refactoring of functions. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4048 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
e632d9b83d
commit
618c69f8dc
|
|
@ -1,4 +1,6 @@
|
||||||
|
import org.broadinstitute.sting.queue.extensions.firehose.ImportSingleValueFunction
|
||||||
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
||||||
|
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
|
|
||||||
|
|
@ -8,6 +10,9 @@ class CleanBamFile extends QScript {
|
||||||
@Argument(doc="gatk jar", shortName="gatk")
|
@Argument(doc="gatk jar", shortName="gatk")
|
||||||
var gatkJar: File = _
|
var gatkJar: File = _
|
||||||
|
|
||||||
|
@Argument(doc="samtools binary", shortName="samtools")
|
||||||
|
var samtoolsBinary: String = _
|
||||||
|
|
||||||
@Argument(doc="fix mates jar", shortName="fixMates")
|
@Argument(doc="fix mates jar", shortName="fixMates")
|
||||||
var fixMatesJar: File = _
|
var fixMatesJar: File = _
|
||||||
|
|
||||||
|
|
@ -29,7 +34,7 @@ class CleanBamFile extends QScript {
|
||||||
@Argument(doc="read group blacklist", shortName="RGBL", required=false)
|
@Argument(doc="read group blacklist", shortName="RGBL", required=false)
|
||||||
var readGroupBlackList: String = _
|
var readGroupBlackList: String = _
|
||||||
|
|
||||||
@Argument(doc="intervals", shortName="L", required=false)
|
@Argument(doc="intervals", shortName="L")
|
||||||
var intervals: File = _
|
var intervals: File = _
|
||||||
|
|
||||||
@Argument(doc="Script that can split the interval file by contig, for example Sting/python/splitIntervalsByContig.py.", shortName="RTCSS")
|
@Argument(doc="Script that can split the interval file by contig, for example Sting/python/splitIntervalsByContig.py.", shortName="RTCSS")
|
||||||
|
|
@ -49,6 +54,33 @@ class CleanBamFile extends QScript {
|
||||||
@Input(doc="dbsnp file", shortName="D")
|
@Input(doc="dbsnp file", shortName="D")
|
||||||
var dbsnpFile: File = _
|
var dbsnpFile: File = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose import jar", shortName="importJar")
|
||||||
|
var firehoseImportJar: File = _
|
||||||
|
|
||||||
|
@Argument(doc="short job queue", shortName="shortQueue", required=false)
|
||||||
|
var shortJobQueue: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose host", shortName="FHHost")
|
||||||
|
var firehoseHost: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose port", shortName="FHPort")
|
||||||
|
var firehosePort: Int = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose domain", shortName="FHDom")
|
||||||
|
var firehoseDomain: String = _
|
||||||
|
|
||||||
|
@Argument(doc="clean bam firehose entity type", shortName="bamFHEType")
|
||||||
|
var bamFirehoseEntityType: String = _
|
||||||
|
|
||||||
|
@Argument(doc="clean bam firehose entity id", shortName="bamFHEID")
|
||||||
|
var bamFirehoseEntityID: String = _
|
||||||
|
|
||||||
|
@Argument(doc="clean bam firehose annotation type name", shortName="bamFHAnn")
|
||||||
|
var bamFirehoseAnnotationTypeName: String = _
|
||||||
|
|
||||||
|
@Argument(doc="clean bam firehose security token", shortName="bamFHToken")
|
||||||
|
var bamFirehoseSecurityToken: String = _
|
||||||
|
|
||||||
trait GATKCommonArgs extends CommandLineGATK {
|
trait GATKCommonArgs extends CommandLineGATK {
|
||||||
this.jarFile = qscript.gatkJar
|
this.jarFile = qscript.gatkJar
|
||||||
this.reference_sequence = qscript.referenceFile
|
this.reference_sequence = qscript.referenceFile
|
||||||
|
|
@ -96,7 +128,7 @@ class CleanBamFile extends QScript {
|
||||||
realigner.DBSNP = dbsnpFile
|
realigner.DBSNP = dbsnpFile
|
||||||
realigner.scatterCount = indelRealignerScatterCount
|
realigner.scatterCount = indelRealignerScatterCount
|
||||||
|
|
||||||
val bamIndex = new BamIndexFunction
|
var fixedBam: File = null
|
||||||
|
|
||||||
if (realigner.scatterCount > 1) {
|
if (realigner.scatterCount > 1) {
|
||||||
realigner.output = baseFile(".cleaned.bam")
|
realigner.output = baseFile(".cleaned.bam")
|
||||||
|
|
@ -105,8 +137,12 @@ class CleanBamFile extends QScript {
|
||||||
case (scatter: IntervalScatterFunction, _) =>
|
case (scatter: IntervalScatterFunction, _) =>
|
||||||
scatter.splitIntervalsScript = indelRealignerScatterScript
|
scatter.splitIntervalsScript = indelRealignerScatterScript
|
||||||
}
|
}
|
||||||
|
realigner.gatherClass = {
|
||||||
|
case source if (source.field.getName=="output") =>
|
||||||
|
classOf[BamGatherFunction]
|
||||||
|
}
|
||||||
realigner.setupGatherFunction = {
|
realigner.setupGatherFunction = {
|
||||||
case (gather: PicardBamJarFunction, _) =>
|
case (gather: BamGatherFunction, _) =>
|
||||||
gather.memoryLimit = Some(4)
|
gather.memoryLimit = Some(4)
|
||||||
gather.jarFile = fixMatesJar
|
gather.jarFile = fixMatesJar
|
||||||
// Don't pass this AS=true to fix mates!
|
// Don't pass this AS=true to fix mates!
|
||||||
|
|
@ -115,7 +151,7 @@ class CleanBamFile extends QScript {
|
||||||
gather.mergeTextScript = mergeTextScript
|
gather.mergeTextScript = mergeTextScript
|
||||||
}
|
}
|
||||||
|
|
||||||
bamIndex.bamFile = realigner.output
|
fixedBam = realigner.output
|
||||||
} else {
|
} else {
|
||||||
realigner.output = baseFile(".unfixed.cleaned.bam")
|
realigner.output = baseFile(".unfixed.cleaned.bam")
|
||||||
|
|
||||||
|
|
@ -132,12 +168,30 @@ class CleanBamFile extends QScript {
|
||||||
fixMates.unfixed = realigner.output
|
fixMates.unfixed = realigner.output
|
||||||
fixMates.fixed = baseFile(".cleaned.bam")
|
fixMates.fixed = baseFile(".cleaned.bam")
|
||||||
|
|
||||||
bamIndex.bamFile = fixMates.fixed
|
fixedBam = fixMates.fixed
|
||||||
|
|
||||||
// Add the fix mates explicitly
|
// Add the fix mates explicitly
|
||||||
add(fixMates)
|
add(fixMates)
|
||||||
}
|
}
|
||||||
|
|
||||||
add(targetCreator, realigner, bamIndex)
|
val bamIndex = new SamtoolsIndexFunction
|
||||||
|
bamIndex.samtools = samtoolsBinary
|
||||||
|
bamIndex.bamFile = fixedBam
|
||||||
|
bamIndex.bamFileIndex = swapExt(fixedBam, "bam", "bam.bai")
|
||||||
|
|
||||||
|
val importer = new ImportSingleValueFunction
|
||||||
|
importer.jobQueue = shortJobQueue
|
||||||
|
importer.jarFile = firehoseImportJar
|
||||||
|
importer.host = firehoseHost
|
||||||
|
importer.port = firehosePort
|
||||||
|
importer.domain = firehoseDomain
|
||||||
|
importer.entityType = bamFirehoseEntityType
|
||||||
|
importer.entityID = bamFirehoseEntityID
|
||||||
|
importer.annotationTypeName = bamFirehoseAnnotationTypeName
|
||||||
|
importer.securityToken = bamFirehoseSecurityToken
|
||||||
|
importer.importValue = fixedBam
|
||||||
|
importer.jobDependencies :+= bamIndex.bamFileIndex
|
||||||
|
|
||||||
|
add(targetCreator, realigner, bamIndex, importer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
|
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.apache.commons.io.FilenameUtils;
|
import org.apache.commons.io.FilenameUtils;
|
||||||
|
|
||||||
|
|
@ -44,7 +45,7 @@ def script = {
|
||||||
|
|
||||||
def bai(bam: File) = new File(bam + ".bai")
|
def bai(bam: File) = new File(bam + ".bai")
|
||||||
|
|
||||||
class Index(bamIn: File) extends BamIndexFunction {
|
class Index(bamIn: File) extends SamtoolsIndexFunction {
|
||||||
bamFile = bamIn
|
bamFile = bamIn
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,45 @@
|
||||||
|
package org.broadinstitute.sting.queue.extensions.firehose
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.function.JarCommandLineFunction
|
||||||
|
import org.broadinstitute.sting.commandline.{Input, Argument}
|
||||||
|
import java.io.File
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Runs the Firehose ImportSingleValue jar file.
|
||||||
|
*/
|
||||||
|
class ImportSingleValueFunction extends JarCommandLineFunction {
|
||||||
|
@Argument(doc="firehose host")
|
||||||
|
var host: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose port")
|
||||||
|
var port: Int = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose domain")
|
||||||
|
var domain: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose entity type")
|
||||||
|
var entityType: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose entity id")
|
||||||
|
var entityID: String = _
|
||||||
|
|
||||||
|
@Argument(doc="firehose annotation type name", shortName="bamFHAnn", required=false)
|
||||||
|
var annotationTypeName: String = _
|
||||||
|
|
||||||
|
@Argument(doc="clean bam firehose security token", shortName="bamFHToken", required=false)
|
||||||
|
var securityToken: String = _
|
||||||
|
|
||||||
|
@Input(doc="imports the path to this file", exclusiveOf="importValueInFile")
|
||||||
|
var importValue: File = _
|
||||||
|
|
||||||
|
@Input(doc="imports the value contained in the file", exclusiveOf="importValue")
|
||||||
|
var importValueInFile: File = _
|
||||||
|
|
||||||
|
override def commandLine = super.commandLine + ("" +
|
||||||
|
" PORT=%s HOST=%s DOMAIN=%s ENTITY_TYPE=%s" +
|
||||||
|
" ENTITY_ID=%s ANNOTATION_TYPE_NAME=%s SECURITY_TOKEN=%s" +
|
||||||
|
"%s%s"
|
||||||
|
).format(
|
||||||
|
port, host, domain, entityType, entityID, annotationTypeName, securityToken,
|
||||||
|
optional(" VALUE=", importValue), optional(" VALUE_FILE=", importValueInFile))
|
||||||
|
}
|
||||||
|
|
@ -20,7 +20,7 @@ trait PicardBamJarFunction extends JarCommandLineFunction {
|
||||||
protected def inputBams: List[File]
|
protected def inputBams: List[File]
|
||||||
protected def outputBam: File
|
protected def outputBam: File
|
||||||
|
|
||||||
override def commandLine = super.commandLine + "%s%s%s".format(
|
override def commandLine = super.commandLine + "%s%s%s%s%s%s%s%s".format(
|
||||||
optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency),
|
optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency),
|
||||||
optional(" SO=", sortOrder), optional( " MAX_RECORDS_IN_RAM=", maxRecordsInRam), optional(" ASSUME_SORTED=", assumeSorted),
|
optional(" SO=", sortOrder), optional( " MAX_RECORDS_IN_RAM=", maxRecordsInRam), optional(" ASSUME_SORTED=", assumeSorted),
|
||||||
" OUTPUT=" + outputBam, repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir)
|
" OUTPUT=" + outputBam, repeat(" INPUT=", inputBams), " TMP_DIR=" + jobTempDir)
|
||||||
|
|
|
||||||
|
|
@ -1,18 +1,15 @@
|
||||||
package org.broadinstitute.sting.queue.extensions.gatk
|
package org.broadinstitute.sting.queue.extensions.samtools
|
||||||
|
|
||||||
import org.broadinstitute.sting.queue.function.CommandLineFunction
|
import org.broadinstitute.sting.queue.function.CommandLineFunction
|
||||||
import java.io.File
|
import java.io.File
|
||||||
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
|
import org.broadinstitute.sting.commandline.{Argument, Output, Input}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Indexes a BAM file.
|
* Indexes a BAM file using samtools.
|
||||||
* By default uses samtools index.
|
|
||||||
* The syntax of the script must be:
|
|
||||||
* <bamIndexScript> <bam_file> <bam_index_file>
|
|
||||||
*/
|
*/
|
||||||
class BamIndexFunction extends CommandLineFunction {
|
class SamtoolsIndexFunction extends CommandLineFunction {
|
||||||
@Argument(doc="BAM file script")
|
@Argument(doc="samtools path")
|
||||||
var bamIndexScript: String = "samtools index"
|
var samtools: String = "samtools"
|
||||||
|
|
||||||
@Input(doc="BAM file to index")
|
@Input(doc="BAM file to index")
|
||||||
var bamFile: File = _
|
var bamFile: File = _
|
||||||
|
|
@ -29,7 +26,7 @@ class BamIndexFunction extends CommandLineFunction {
|
||||||
bamFileIndex = new File(bamFile.getPath + ".bai")
|
bamFileIndex = new File(bamFile.getPath + ".bai")
|
||||||
}
|
}
|
||||||
|
|
||||||
def commandLine = "%s %s %s".format(bamIndexScript, bamFile, bamFileIndex)
|
def commandLine = "%s index %s %s".format(samtools, bamFile, bamFileIndex)
|
||||||
|
|
||||||
override def dotString = "Index: %s".format(bamFile.getName)
|
override def dotString = "Index: %s".format(bamFile.getName)
|
||||||
}
|
}
|
||||||
|
|
@ -44,6 +44,10 @@ trait CommandLineFunction extends QFunction with Logging {
|
||||||
/** If true this function will run only if the jobs it is dependent on succeed. */
|
/** If true this function will run only if the jobs it is dependent on succeed. */
|
||||||
var jobRunOnlyIfPreviousSucceed = true
|
var jobRunOnlyIfPreviousSucceed = true
|
||||||
|
|
||||||
|
/** Files that this job should wait on before running. */
|
||||||
|
@Input(doc="Explicit job dependencies", required=false)
|
||||||
|
var jobDependencies: List[File] = Nil
|
||||||
|
|
||||||
/** File to redirect any output. Defaults to <jobName>.out */
|
/** File to redirect any output. Defaults to <jobName>.out */
|
||||||
@Output(doc="File to redirect any output", required=false)
|
@Output(doc="File to redirect any output", required=false)
|
||||||
@Gather(classOf[SimpleTextGatherFunction])
|
@Gather(classOf[SimpleTextGatherFunction])
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue