142 lines
5.8 KiB
Scala
Executable File
142 lines
5.8 KiB
Scala
Executable File
package org.broadinstitute.sting.queue.pipeline
|
|
|
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
|
import java.io.File
|
|
import org.broadinstitute.sting.queue.extensions.picard.PicardBamFunction
|
|
import org.broadinstitute.sting.queue.function.CommandLineFunction
|
|
import org.broadinstitute.sting.utils.yaml.YamlUtils
|
|
import org.broadinstitute.sting.datasources.pipeline.Pipeline
|
|
import net.sf.picard.reference.ReferenceSequenceFileFactory
|
|
import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
|
|
import org.broadinstitute.sting.utils.interval.IntervalUtils
|
|
import collection.mutable.{ListBuffer, HashMap}
|
|
import collection.JavaConversions
|
|
import java.util.Arrays
|
|
import org.broadinstitute.sting.queue.util.{PipelineUtils, IOUtils}
|
|
import org.broadinstitute.sting.commandline.{Output, Input}
|
|
import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction
|
|
|
|
class BamProcessing(attribs: Pipeline, gatkJar: File, fixMatesJar: File) {
|
|
library =>
|
|
|
|
var attributes : Pipeline = attribs
|
|
|
|
def this(yaml: File, gatkJar: File, fixMatesJar: File) = this(YamlUtils.load(classOf[Pipeline],yaml),gatkJar,fixMatesJar)
|
|
|
|
|
|
trait StandardCommandLineGATK extends CommandLineGATK {
|
|
this.reference_sequence = library.attributes.getProject.getReferenceFile
|
|
this.intervals = List(library.attributes.getProject.getIntervalList)
|
|
this.rodBind :+= new RodBind("dbsnp", library.attributes.getProject.getGenotypeDbsnpType, library.attributes.getProject.getGenotypeDbsnp)
|
|
this.memoryLimit = Some(2)
|
|
this.jarFile = library.gatkJar
|
|
}
|
|
|
|
/**
|
|
* @Doc: Creates a standard realigner target creator CLF given a bam, an output file, and the contigs over which to run
|
|
* @Returns: A CLF for the realigner target creator job
|
|
*/
|
|
def StandardRealignerTargetCreator(bam: File, contigs: List[String], output: File) : RealignerTargetCreator = {
|
|
var rtc = new RealignerTargetCreator with StandardCommandLineGATK
|
|
rtc.intervals = Nil
|
|
rtc.intervalsString = contigs
|
|
rtc.input_file :+= bam
|
|
rtc.out = output
|
|
rtc.analysisName = "RealignerTargetCreator"
|
|
|
|
return rtc
|
|
}
|
|
|
|
/**
|
|
* @Doc: Creates a standard indel cleaner CLF given a bam, the results of the target creator, and an output .bam file
|
|
* @Returns: A CLF for the indel cleaning job
|
|
*/
|
|
def StandardIndelCleaner(bam: File, contigs: List[String], targets: File, outBam: File) : IndelRealigner = {
|
|
var realigner = new IndelRealigner with StandardCommandLineGATK
|
|
realigner.intervalsString = contigs
|
|
realigner.intervals = Nil
|
|
realigner.input_file :+= bam
|
|
realigner.out = outBam
|
|
realigner.targetIntervals = targets
|
|
realigner.analysisName = "IndelClean"
|
|
realigner.bam_compression = Some(0)
|
|
|
|
return realigner
|
|
}
|
|
|
|
/**
|
|
* @Doc: Creates a standard split-by-contig indel cleaner job for a given bam file, RTC output, and bam to merge everything to
|
|
* @Returns: A list of CLFs (todo -- wrapped in a Pipeline)
|
|
*/
|
|
def StandardIndelCleanBam(bam: File, jobContigs: List[List[String]], targets: File, cleanedBam: File) : List[CommandLineFunction] = {
|
|
var cmds : List[CommandLineFunction] = Nil
|
|
var jobSpecs : List[(File,File,List[String])] = jobContigs.map[(File,File,List[String]),List[(File,File,List[String])]](
|
|
ctigs => { (bam, swapExt(bam,".bam",".%s.bam".format(ctigs.mkString("_"))), ctigs) }
|
|
)
|
|
var bamsToMerge : List[File] = Nil
|
|
for ( spec <- jobSpecs ) {
|
|
cmds :+= StandardIndelCleaner(spec._1,spec._3,targets,spec._2)
|
|
bamsToMerge :+= spec._2
|
|
}
|
|
|
|
cmds :+= StandardPicardFixMates(bamsToMerge,cleanedBam,library.fixMatesJar)
|
|
|
|
return cmds
|
|
|
|
}
|
|
|
|
/**
|
|
* @Doc: Given a list of (pairs of) bams and cleaned bams to write to, and a number of jobs, creates a set of
|
|
* command line functions to do the target-creating, splitting, cleaning, and merging, returning that list
|
|
* of command line functions
|
|
* @Returns: A list of command line functions for the full indel realignment pipeline from the collection
|
|
* of uncleaned bams to the collection of cleaned bams
|
|
*/
|
|
def StandardIndelRealign( bamsUncleanCleanPairs: List[(File,File)], nJobs: Int = 1 ) : List[CommandLineFunction] = {
|
|
val contigsForJobs : List[List[String]] = PipelineUtils.smartSplitContigs(library.attributes.getProject.getReferenceFile, library.attributes.getProject.getIntervalList, nJobs)
|
|
var commands : List[CommandLineFunction] = Nil
|
|
for ( bamPair <- bamsUncleanCleanPairs ) {
|
|
val rtc : RealignerTargetCreator = StandardRealignerTargetCreator(bamPair._1,contigsForJobs.foldLeft[List[String]](Nil)( (a,b) => a ::: b), swapExt(bamPair._1,".bam",".targets") )
|
|
val icbs : List[CommandLineFunction] = StandardIndelCleanBam(bamPair._1,contigsForJobs,rtc.out,bamPair._2)
|
|
val sam : SamtoolsIndexFunction = new SamtoolsIndexFunction
|
|
sam.bamFile = bamPair._2
|
|
sam.analysisName = "SamtoolsIndex"
|
|
commands :+= rtc
|
|
commands ++= icbs
|
|
commands :+= sam
|
|
}
|
|
|
|
return commands
|
|
}
|
|
|
|
/**
|
|
* @Doc: Merges N bam files into one bam file, fixing mate pairs in the process; does not assume they are sorted
|
|
* @Returns: Command line function for the merge, fix-mate, and sort operation
|
|
*/
|
|
def StandardPicardFixMates(inBams: List[File], outBam: File, picardJar: File) : CommandLineFunction = {
|
|
var pfm : PicardFixMates = new PicardFixMates
|
|
pfm.bams = inBams
|
|
pfm.outBam = outBam
|
|
pfm.jarFile = picardJar
|
|
pfm.assumeSorted = Some(false)
|
|
pfm.memoryLimit = Some(4)
|
|
pfm.analysisName = "FixMates"
|
|
|
|
return pfm
|
|
}
|
|
|
|
class PicardFixMates extends PicardBamFunction {
|
|
@Input(doc="input bam files") var bams: List[File] = Nil
|
|
@Output(doc="output bam file") var outBam: File = null
|
|
|
|
def inputBams: List[File] = bams
|
|
def outputBam: File = outBam
|
|
|
|
}
|
|
|
|
|
|
def swapExt(file: File, oldExtension: String, newExtension: String) =
|
|
new File(file.getName.stripSuffix(oldExtension) + newExtension)
|
|
|
|
}
|