Added support for PICARD functions to QUEUE after following Khalid's pointers on where to do it. I have added the 6 functions used by the Data Processing Pipeline, but from now on it should be a matter of seconds to copy/paste and create bindings to more functions.

Updated the Data Processing Pipeline to use the new Picard classes and reorganized the pre-processing of the pipeline accordingly.

Will only update the wiki once this change goes live.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6071 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
droazen 2011-06-22 22:56:14 +00:00
parent 658e65d26c
commit 48055d45cb
7 changed files with 343 additions and 105 deletions

View File

@ -1,7 +1,6 @@
package core package core
import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction
import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.queue.function.ListWriterFunction
@ -10,6 +9,7 @@ import net.sf.samtools.{SAMFileReader,SAMReadGroupRecord}
import scala.io.Source._ import scala.io.Source._
import collection.JavaConversions._ import collection.JavaConversions._
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
import org.broadinstitute.sting.queue.extensions.picard._
class DataProcessingPipeline extends QScript { class DataProcessingPipeline extends QScript {
@ -23,26 +23,27 @@ class DataProcessingPipeline extends QScript {
@Input(doc="input BAM file - or list of BAM files", fullName="input", shortName="i", required=true) @Input(doc="input BAM file - or list of BAM files", fullName="input", shortName="i", required=true)
var input: File = _ var input: File = _
@Input(doc="path to GenomeAnalysisTK.jar", fullName="path_to_gatk_jar", shortName="gatk", required=true)
var GATKjar: File = _
@Input(doc="path to AnalyzeCovariates.jar", fullName="path_to_ac_jar", shortName="ac", required=true)
var ACJar: File = _
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true) @Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
var R: String = _ var R: String = _
@Input(doc="path to Picard's MarkDuplicates.jar", fullName="path_to_dedup_jar", shortName="dedup", required=false) @Input(doc="Reference fasta file", fullName="reference", shortName="R", required=true)
var dedupJar: File = new File("/seq/software/picard/current/bin/MarkDuplicates.jar") var reference: File = _
@Input(doc="path to Picard's MergeSamFiles.jar", fullName="path_to_merge_jar", shortName="merge", required=false) // @Input(doc="path to GenomeAnalysisTK.jar", fullName="path_to_gatk_jar", shortName="gatk", required=false)
var mergeBamJar: File = new File("/seq/software/picard/current/bin/MergeSamFiles.jar") // var GATKjar: File = _
//
// @Input(doc="path to AnalyzeCovariates.jar", fullName="path_to_ac_jar", shortName="ac", required=false)
// var ACJar: File = _
//
// @Input(doc="path to Picard's MarkDuplicates.jar", fullName="path_to_dedup_jar", shortName="dedup", required=false)
// var dedupJar: File = _
//
// @Input(doc="path to Picard's MergeSamFiles.jar", fullName="path_to_merge_jar", shortName="merge", required=false)
// var mergeBamJar: File = _
//
// @Input(doc="path to Picard's ValidateSamFile.jar", fullName="path_to_validate_jar", shortName="validate", required=false)
// var validateSamJar: File = _
@Input(doc="path to Picard's ValidateSamFile.jar", fullName="path_to_validate_jar", shortName="validate", required=false)
var validateSamJar: File = new File("/seq/software/picard/current/bin/ValidateSamFile.jar")
@Input(doc="Reference fasta file", fullName="reference", shortName="R", required=false)
var reference: File = new File("/seq/references/Homo_sapiens_assembly19/v1/Homo_sapiens_assembly19.fasta")
@ -51,12 +52,9 @@ class DataProcessingPipeline extends QScript {
****************************************************************************/ ****************************************************************************/
@Input(doc="path to Picard's RevertSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_revert_jar", shortName="revert", required=false) // @Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
var revertSamJar: File = _ // var sortSamJar: File = _
//
@Input(doc="path to Picard's SortSam.jar (if re-aligning a previously processed BAM file)", fullName="path_to_sort_jar", shortName="sort", required=false)
var sortSamJar: File = _
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false) @Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
var bwaPath: File = _ var bwaPath: File = _
@ -110,6 +108,14 @@ class DataProcessingPipeline extends QScript {
* Helper classes and methods * Helper classes and methods
****************************************************************************/ ****************************************************************************/
class ReadGroup (val id: String,
val lb: String,
val pl: String,
val pu: String,
val sm: String,
val cn: String,
val ds: String)
{}
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that) // Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = { def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
@ -123,14 +129,13 @@ class DataProcessingPipeline extends QScript {
return false return false
} }
// Utility function to merge all bam files of similar samples. Generates on BAM file per sample. // Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
// It uses the sample information on the header of the input BAM files. // It uses the sample information on the header of the input BAM files.
// //
// Because the realignment only happens after these scripts are executed, in case you are using // Because the realignment only happens after these scripts are executed, in case you are using
// bwa realignment, this function will operate over the original bam files and output over the // bwa realignment, this function will operate over the original bam files and output over the
// (to be realigned) bam files. // (to be realigned) bam files.
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File] = null): Map[String, File] = { def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, File] = {
assert(bamFiles.length == realignedBamFiles.length, "List of orignal files must have the same number of files than the list of realigned bam files: " + bamFiles.length + " / " + realignedBamFiles.length)
// Creating a table with SAMPLE information from each input BAM file // Creating a table with SAMPLE information from each input BAM file
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]] val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
@ -173,41 +178,42 @@ class DataProcessingPipeline extends QScript {
} }
// Rebuilds the Read Group string to give BWA // Rebuilds the Read Group string to give BWA
def buildReadGroupString(samReader: SAMFileReader): List[String] = { def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
val readGroups = samReader.getFileHeader.getReadGroups val readGroups = samReader.getFileHeader.getReadGroups
var l: List[String] = List() var index: Int = readGroups.length
for (rg <- readGroups) { for (rg <- readGroups) {
l :+= "@RG\t" + val intermediateInBam: File = if (index == readGroups.length) { inBam } else { swapExt(outBam, ".bam", index+1 + "-rg.bam") }
SAMReadGroupRecord.READ_GROUP_ID_TAG + ":" + rg.getReadGroupId + "\t" + val intermediateOutBam: File = if (index > 1) {swapExt(outBam, ".bam", index + "-rg.bam") } else { outBam}
SAMReadGroupRecord.PLATFORM_TAG + ":" + rg.getPlatformUnit + "\t" + val readGroup = new ReadGroup(rg.getReadGroupId, rg.getPlatform, rg.getLibrary, rg.getPlatformUnit, rg.getSample, rg.getSequencingCenter, rg.getDescription)
SAMReadGroupRecord.LIBRARY_TAG + ":" + rg.getLibrary + "\t" + add(addReadGroup(intermediateInBam, intermediateOutBam, readGroup))
SAMReadGroupRecord.READ_GROUP_SAMPLE_TAG + ":" + rg.getSample + "\t" + index = index - 1
SAMReadGroupRecord.SEQUENCING_CENTER_TAG + ":" + rg.getSequencingCenter
} }
return l }
}
// Takes a list of processed BAM files and realign them using the BWA option requested (bwase or bwape). // Takes a list of processed BAM files and realign them using the BWA option requested (bwase or bwape).
// Returns a list of realigned BAM files. // Returns a list of realigned BAM files.
def performAlignment(bams: List[File]): List[File] = { def performAlignment(bams: List[File]): List[File] = {
var realignedBams: List[File] = List() var realignedBams: List[File] = List()
var index = 1
for (bam <- bams) { for (bam <- bams) {
val saiFile1 = swapExt(bam, ".bam", "1.sai") val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
val saiFile2 = swapExt(bam, ".bam", "2.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
val realignedSamFile = swapExt(bam, ".bam", ".realigned.sam") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
val realignedBamFile = swapExt(bam, ".bam", ".realigned.bam") val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam")
val readGroupString = buildReadGroupString(new SAMFileReader(bam)) val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam")
if (useBWAse) { if (useBWAse) {
add(bwa_aln_se(bam, saiFile1), add(bwa_aln_se(bam, saiFile1),
bwa_sam_se(bam, saiFile1, realignedSamFile, readGroupString)) bwa_sam_se(bam, saiFile1, realignedSamFile))
} }
else { else {
add(bwa_aln_pe(bam, saiFile1, 1), add(bwa_aln_pe(bam, saiFile1, 1),
bwa_aln_pe(bam, saiFile2, 2), bwa_aln_pe(bam, saiFile2, 2),
bwa_sam_pe(bam, saiFile1, saiFile2, realignedSamFile, readGroupString)) bwa_sam_pe(bam, saiFile1, saiFile2, realignedSamFile))
} }
add(sortSam(realignedSamFile, realignedBamFile)) add(sortSam(realignedSamFile, realignedBamFile))
realignedBams :+= realignedBamFile addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam))
realignedBams :+= rgRealignedBamFile
index = index + 1
} }
return realignedBams return realignedBams
} }
@ -299,13 +305,12 @@ class DataProcessingPipeline extends QScript {
/**************************************************************************** /****************************************************************************
* Classes (Walkers and non-GATK programs) * Classes (GATK Walkers)
****************************************************************************/ ****************************************************************************/
// General arguments to GATK walkers // General arguments to GATK walkers
trait CommandLineGATKArgs extends CommandLineGATK { trait CommandLineGATKArgs extends CommandLineGATK {
this.jarFile = qscript.GATKjar
this.reference_sequence = qscript.reference this.reference_sequence = qscript.reference
this.memoryLimit = 4 this.memoryLimit = 4
this.isIntermediate = true this.isIntermediate = true
@ -364,10 +369,12 @@ class DataProcessingPipeline extends QScript {
// Outside tools (not GATK walkers) /****************************************************************************
* Classes (non-GATK programs)
****************************************************************************/
case class analyzeCovariates (inRecalFile: File, outPath: File) extends AnalyzeCovariates { case class analyzeCovariates (inRecalFile: File, outPath: File) extends AnalyzeCovariates {
this.jarFile = qscript.ACJar
this.resources = qscript.R this.resources = qscript.R
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.output_dir = outPath.toString this.output_dir = outPath.toString
@ -375,73 +382,56 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + inRecalFile + ".analyze_covariates" this.jobName = queueLogDir + inRecalFile + ".analyze_covariates"
} }
case class dedup (inBam: File, outBam: File, metricsFile: File) extends PicardBamFunction { case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates {
@Input(doc="fixed bam") var clean = inBam this.input = List(inBam)
@Output(doc="deduped bam") var deduped = outBam this.output = outBam
@Output(doc="deduped bam index") var dedupedIndex = new File(outBam + "bai") this.metrics = metricsFile
@Output(doc="metrics file") var metrics = metricsFile
override def inputBams = List(clean)
override def outputBam = deduped
override def commandLine = super.commandLine + " M=" + metricsFile
this.sortOrder = null
this.createIndex = true
this.memoryLimit = 6 this.memoryLimit = 6
this.isIntermediate = true this.isIntermediate = true
this.jarFile = qscript.dedupJar
this.analysisName = queueLogDir + outBam + ".dedup" this.analysisName = queueLogDir + outBam + ".dedup"
this.jobName = queueLogDir + outBam + ".dedup" this.jobName = queueLogDir + outBam + ".dedup"
} }
case class joinBams (inBams: List[File], outBam: File) extends PicardBamFunction { case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles {
@Input(doc="input bam list") var join = inBams this.input = inBams
@Output(doc="joined bam") var joined = outBam this.output = outBam
@Output(doc="joined bam index") var joinedIndex = new File(outBam + "bai")
override def inputBams = join
override def outputBam = joined
override def commandLine = super.commandLine + " CREATE_INDEX=true"
this.jarFile = qscript.mergeBamJar
this.isIntermediate = true this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".joinBams" this.analysisName = queueLogDir + outBam + ".joinBams"
this.jobName = queueLogDir + outBam + ".joinBams" this.jobName = queueLogDir + outBam + ".joinBams"
} }
case class sortSam (inSam: File, outBam: File) extends PicardBamFunction { case class sortSam (inSam: File, outBam: File) extends SortSam {
@Input(doc="input unsorted sam file") var sam = inSam this.input = List(inSam)
@Output(doc="sorted bam") var bam = outBam this.output = outBam
@Output(doc="sorted bam index") var bamIndex = new File(outBam + "bai")
override def inputBams = List(sam)
override def outputBam = bam
override def commandLine = super.commandLine + " CREATE_INDEX=true"
this.jarFile = qscript.sortSamJar
this.isIntermediate = true this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".sortSam" this.analysisName = queueLogDir + outBam + ".sortSam"
this.jobName = queueLogDir + outBam + ".sortSam" this.jobName = queueLogDir + outBam + ".sortSam"
} }
case class validate (inBam: File, outLog: File) extends PicardBamFunction { case class validate (inBam: File, outLog: File) extends ValidateSamFile {
@Input(doc="input bam list") var toValidate = inBam this.input = List(inBam)
@Output(doc="validation log") var validate = outLog this.output = outLog
override def inputBams = List(inBam) this.maxRecordsInRam = 100000
override def outputBam = outLog this.REFERENCE_SEQUENCE = qscript.reference
override def commandLine = super.commandLine + " VALIDATE_INDEX=true MAX_RECORDS_IN_RAM=100000 MODE=SUMMARY REFERENCE_SEQUENCE=" + qscript.reference
sortOrder = null
this.jarFile = qscript.validateSamJar
this.isIntermediate = false this.isIntermediate = false
this.analysisName = queueLogDir + outLog + ".validate" this.analysisName = queueLogDir + outLog + ".validate"
this.jobName = queueLogDir + outLog + ".validate" this.jobName = queueLogDir + outLog + ".validate"
} }
case class revert (inBam: File, outBam: File) extends PicardBamFunction {
@Input(doc="old annotated bam") var oldBam = inBam case class addReadGroup (inBam: File, outBam: File, readGroup: ReadGroup) extends AddOrReplaceReadGroups {
@Output(doc="reverted bam") var revertedBam = outBam this.input = List(inBam)
@Output(doc="reverted bam index") var revertedBamIndex = new File(outBam + ".bai") this.output = outBam
override def inputBams = List(oldBam) this.RGID = readGroup.id
override def outputBam = revertedBam this.RGCN = readGroup.cn
override def commandLine = super.commandLine + " CREATE_INDEX=true" this.RGDS = readGroup.ds
this.RGLB = readGroup.lb
this.RGPL = readGroup.pl
this.RGPU = readGroup.pu
this.RGSM = readGroup.sm
this.isIntermediate = true this.isIntermediate = true
this.jarFile = qscript.dedupJar this.analysisName = queueLogDir + outBam + ".rg"
this.analysisName = queueLogDir + outBam + ".dedup" this.jobName = queueLogDir + outBam + ".rg"
this.jobName = queueLogDir + outBam + ".dedup"
} }
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction {
@ -462,30 +452,22 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1"
} }
case class bwa_sam_se (inBam: File, inSai: File, outBam: File, readGroup: List[String]) extends CommandLineFunction { case class bwa_sam_se (inBam: File, inSai: File, outBam: File) extends CommandLineFunction {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Input(doc="bwa alignment index file") var sai = inSai @Input(doc="bwa alignment index file") var sai = inSai
@Output(doc="output aligned bam file") var alignedBam = outBam @Output(doc="output aligned bam file") var alignedBam = outBam
var readGroupParameters = "" def commandLine = bwaPath + " samse " + reference + " " + sai + " " + bam + " > " + alignedBam
for (rg <- readGroup) {
readGroupParameters += " -r \'" + rg + "\'"
}
def commandLine = bwaPath + " samse " + readGroupParameters + " " + reference + " " + sai + " " + bam + " > " + alignedBam
this.isIntermediate = true this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".bwa_sam_se" this.analysisName = queueLogDir + outBam + ".bwa_sam_se"
this.jobName = queueLogDir + outBam + ".bwa_sam_se" this.jobName = queueLogDir + outBam + ".bwa_sam_se"
} }
case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File, readGroup: List[String]) extends CommandLineFunction { case class bwa_sam_pe (inBam: File, inSai1: File, inSai2:File, outBam: File) extends CommandLineFunction {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1 @Input(doc="bwa alignment index file for 1st mating pair") var sai1 = inSai1
@Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2 @Input(doc="bwa alignment index file for 2nd mating pair") var sai2 = inSai2
@Output(doc="output aligned bam file") var alignedBam = outBam @Output(doc="output aligned bam file") var alignedBam = outBam
var readGroupParameters = "" def commandLine = bwaPath + " sampe " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
for (rg <- readGroup) {
readGroupParameters += " -r \'" + rg + "\'"
}
def commandLine = bwaPath + " sampe " + readGroupParameters + " " + reference + " " + sai1 + " " + sai2 + " " + bam + " " + bam + " > " + alignedBam
this.isIntermediate = true this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".bwa_sam_pe" this.analysisName = queueLogDir + outBam + ".bwa_sam_pe"
this.jobName = queueLogDir + outBam + ".bwa_sam_pe" this.jobName = queueLogDir + outBam + ".bwa_sam_pe"

View File

@ -0,0 +1,59 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "AddOrReplaceReadGroups"
javaMainClass = "net.sf.picard.sam.AddOrReplaceReadGroups"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = _
@Output(doc="The output BAM file with the modified/added read groups", shortName = "output", fullName = "output_bam_file", required = true)
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
@Argument(doc="Read group ID", shortName = "id", fullName = "read_group_id", required = true)
var RGID: String = _
@Argument(doc = "Read group library", shortName = "lb", fullName = "read_group_library", required = true)
var RGLB: String = _
@Argument(doc = "Read group platform (e.g. illumina, solid)", shortName = "pl", fullName ="read_group_platform", required=true)
var RGPL: String = _
@Argument(doc = "Read group platform unit (e.g. run barcode)", shortName = "pu", fullName = "read_group_platform_unit", required = true)
var RGPU: String = _
@Argument(doc = "Read group sample name", shortName = "sm", fullName = "read_group_sample_name", required = true)
var RGSM: String = _
@Argument(doc = "Read group center name", shortName = "cn", fullName = "read_group_center_name", required = false)
var RGCN: String = ""
@Argument(doc = "Read group description", shortName = "ds", fullName = "read_group_description", required = false)
var RGDS: String = ""
override def inputBams = input
override def outputBam = output
this.createIndex = Some(true)
override def commandLine = super.commandLine +
" RGID=" + RGID +
" RGLB=" + RGLB +
" RGPL=" + RGPL +
" RGPU=" + RGPU +
" RGSM=" + RGSM +
conditionalParameter(RGCN != null && !RGCN.isEmpty, " RGCN=" + RGCN) +
conditionalParameter(RGDS != null && !RGDS.isEmpty, " RGDS=" + RGDS)
}

View File

@ -0,0 +1,49 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "MarkDuplicates"
javaMainClass = "net.sf.picard.sam.MarkDuplicates"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = _
@Output(doc="The output file to write marked records to", shortName = "output", fullName = "output_bam_file", required = true)
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
@Output(doc="File to write duplication metrics to", shortName = "out_metrics", fullName = "output_metrics_file", required = false)
var metrics: File = new File(output + ".metrics")
@Argument(doc="If true do not write duplicates to the output file instead of writing them with appropriate flags set.", shortName = "remdup", fullName = "remove_duplicates", required = false)
var REMOVE_DUPLICATES: Boolean = false
@Argument(doc = "Maximum number of file handles to keep open when spilling read ends to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_file_handles", fullName ="max_file_handles_for_read_ends_maps", required=false)
var MAX_FILE_HANDLES_FOR_READ_ENDS_MAP: Int = -1;
@Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false)
var SORTING_COLLECTION_SIZE_RATIO: Double = -1
override def inputBams = input
override def outputBam = output
this.sortOrder = null
this.createIndex = Some(true)
override def commandLine = super.commandLine +
" M=" + metrics +
conditionalParameter(REMOVE_DUPLICATES, " REMOVE_DUPLICATES=true") +
conditionalParameter(MAX_FILE_HANDLES_FOR_READ_ENDS_MAP > 0, " MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=" + MAX_FILE_HANDLES_FOR_READ_ENDS_MAP.toString) +
conditionalParameter(SORTING_COLLECTION_SIZE_RATIO > 0, " SORTING_COLLECTION_SIZE_RATIO=" + SORTING_COLLECTION_SIZE_RATIO.toString)
}

View File

@ -0,0 +1,44 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "MergeSamFiles"
javaMainClass = "net.sf.picard.sam.MergeSamFiles"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = _
@Output(doc="The output merged BAM file", shortName = "output", fullName = "output_bam_file", required = true)
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
@Argument(doc="Merge the seqeunce dictionaries Default value: false. This option can be set to 'null' to clear the default value.", shortName = "merge_dict", fullName = "merge_sequence_dictionaries", required = false)
var MERGE_SEQUENCE_DICTIONARIES: Boolean = false
@Argument(doc = "Option to enable a simple two-thread producer consumer version of the merge algorithm that uses one thread to read and merge the records from the input files and another thread to encode, compress and write to disk the output file. The threaded version uses about 20% more CPU and decreases runtime by ~20% when writing out a compressed BAM file. ", shortName = "thread", fullName ="use_threading", required=false)
var USE_THREADING: Boolean = false;
@Argument(doc = "Comments to include in the merged output file's header.", shortName = "com", fullName = "comments", required = false)
var COMMENT: String = ""
override def inputBams = input
override def outputBam = output
this.createIndex = Some(true)
override def commandLine = super.commandLine +
conditionalParameter(MERGE_SEQUENCE_DICTIONARIES, " MERGE_SEQUENCE_DICTIONARIES=true") +
conditionalParameter(USE_THREADING, " USE_THREADING=true") +
conditionalParameter(COMMENT != null && !COMMENT.isEmpty, " COMMENT=" + COMMENT)
}

View File

@ -0,0 +1,30 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class SortSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "SortSam"
javaMainClass = "net.sf.picard.sam.SortSam"
@Input(doc="The input SAM or BAM files to sort.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = _
@Output(doc="The sorted BAM or SAM output file.", shortName = "output", fullName = "output_bam_file", required = true)
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
override def inputBams = input
override def outputBam = output
this.createIndex = Some(true)
override def commandLine = super.commandLine
}

View File

@ -0,0 +1,60 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import net.sf.picard.sam.ValidateSamFile.Mode
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class ValidateSamFile extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "ValidateSamFile"
javaMainClass = "net.sf.picard.sam.ValidateSamFile"
@Input(doc="The input SAM or BAM files to analyze. Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = _
@Output(doc="Send output to a file instead of stdout", shortName = "output", fullName = "output_file", required = false)
var output: File = _
@Argument(doc="Mode of output", shortName = "mode", fullName = "mode_of_output", required = false)
var MODE: Mode = Mode.VERBOSE
@Argument(doc="List of validation error types to ignore.", shortName = "ignore", fullName = "ignore_error_types", required = false)
var IGNORE: List[String] = _
@Argument(doc = "The maximum number of lines output in verbose mode.", shortName = "max", fullName = "max_output", required = false)
var MAX_OUTPUT: Int = 100
@Argument(doc = "Reference sequence file, the NM tag check will be skipped if this is missing.", shortName = "ref", fullName ="reference_sequence", required=false)
var REFERENCE_SEQUENCE: File = _
@Argument(doc = "If true, only report errors, and ignore warnings.", shortName = "iw", fullName = "ignore_warnings", required = false)
var IGNORE_WARNINGS: Boolean = false
@Argument(doc = "If true and input is a BAM file with an index file, also validates the index.", shortName = "vi", fullName = "validate_index", required = false)
var VALIDATE_INDEX: Boolean = true
@Argument(doc = "Whether the SAM or BAM file consists of bisulfite sequenced reads. If so, C->T is not counted as an error in computing the value of the NM tag.", shortName = "bs", fullName = "is_bisulfite_sequenced", required = false)
var IS_BISULFITE_SEQUENCED: Boolean = false
@Argument(doc = "Relevant for a coordinate-sorted file containing read pairs only. Maximum number of file handles to keep open when spilling mate info to disk. Set this number a little lower than the per-process maximum number of file that may be open. This number can be found by executing the 'ulimit -n' command on a Unix system.", shortName = "max_files", fullName = "max_open_temp_files", required = false)
var MAX_OPEN_TEMP_FILES: Int = 8000
this.sortOrder = null
override def inputBams = input
override def outputBam = output
override def commandLine = super.commandLine +
" MODE=" + MODE +
" MAX_OUTPUT=" + MAX_OUTPUT +
" MAX_OPEN_TEMP_FILES=" + MAX_OPEN_TEMP_FILES +
conditionalParameter(!VALIDATE_INDEX, " VALIDATE_INDEX=false") +
conditionalParameter(IGNORE_WARNINGS, " IGNORE_WARNINGS=true") +
conditionalParameter(IS_BISULFITE_SEQUENCED, " IS_BISULFITE_SEQUENCED=true") +
conditionalParameter(IGNORE != null && !IGNORE.isEmpty, repeat(" IGNORE=", IGNORE))
}

View File

@ -71,7 +71,10 @@ trait CommandLineFunction extends QFunction with Logging {
*/ */
protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "", protected def repeat(prefix: String, params: Traversable[_], suffix: String = "", separator: String = "",
format: (String, Any, String) => String = formatValue("%s")) = format: (String, Any, String) => String = formatValue("%s")) =
params.filter(param => hasValue(param)).map(param => format(prefix, param, suffix)).mkString(separator) if (params == null)
""
else
params.filter(param => hasValue(param)).map(param => format(prefix, param, suffix)).mkString(separator)
/** /**
* Returns parameter with a prefix/suffix if it is set otherwise returns "". * Returns parameter with a prefix/suffix if it is set otherwise returns "".
@ -103,4 +106,15 @@ trait CommandLineFunction extends QFunction with Logging {
case x => format.format(x) case x => format.format(x)
}) + suffix }) + suffix
/**
* Returns the parameter if the condition is true. Useful for long string of parameters
* @param condition the condition to validate
* @param param the string to be returned in case condition is true
* @return param if condition is true, "" otherwise
*/
protected def conditionalParameter(condition: Boolean, param: String): String =
if (condition == true)
param
else
""
} }