now merging bam files per sample and processing according to cleaning options.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5477 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
1c95208e26
commit
c9442e4b21
|
|
@ -5,6 +5,9 @@ import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||||
|
|
||||||
|
import net.sf.samtools.{SAMFileReader,SAMFileHeader,SAMReadGroupRecord}
|
||||||
|
import collection.JavaConversions._
|
||||||
|
|
||||||
|
|
||||||
class dataProcessingV2 extends QScript {
|
class dataProcessingV2 extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
@ -62,28 +65,59 @@ class dataProcessingV2 extends QScript {
|
||||||
// Helpful variables
|
// Helpful variables
|
||||||
val outName: String = qscript.outputDir + qscript.projectName
|
val outName: String = qscript.outputDir + qscript.projectName
|
||||||
|
|
||||||
// BAM files generated by the pipeline
|
|
||||||
val joinedBams = new File(outName + ".join.bam")
|
|
||||||
val cleanedBam = new File(outName + ".clean.bam")
|
|
||||||
val dedupedBam = new File(outName + ".clean.dedup.bam")
|
|
||||||
val recalBam = new File(outName + ".clean.dedup.recal.bam")
|
|
||||||
|
|
||||||
// Accessory files
|
|
||||||
val targetIntervals = new File(outName + ".intervals")
|
|
||||||
val metricsFile = new File(outName + ".metrics")
|
|
||||||
val preRecalFile = new File(outName + ".pre_recal.csv")
|
|
||||||
val postRecalFile = new File(outName + ".post_recal.csv")
|
|
||||||
val preOutPath = new File(outName + ".pre")
|
|
||||||
val postOutPath = new File(outName + ".post")
|
|
||||||
|
|
||||||
|
|
||||||
//todo -- process bam headers to compile bamLists of samples.
|
//todo -- process bam headers to compile bamLists of samples.
|
||||||
|
var sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
||||||
|
for (bam <- scala.io.Source.fromFile(input).getLines) {
|
||||||
|
val bamFile = new File(bam)
|
||||||
|
val samReader = new SAMFileReader(bamFile)
|
||||||
|
val header = samReader.getFileHeader()
|
||||||
|
val readGroup = header.getReadGroups()
|
||||||
|
for (rg <- readGroup) {
|
||||||
|
val sample = rg.getSample()
|
||||||
|
if (!sampleTable.contains(sample))
|
||||||
|
sampleTable(sample) = List(bamFile)
|
||||||
|
else if ( !sampleTable(sample).contains(bamFile))
|
||||||
|
sampleTable(sample) :+= bamFile
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||||
|
|
||||||
|
for ((sample, flist) <- sampleTable) {
|
||||||
|
val sampleFileName = new File(outName + "." + sample + ".bam")
|
||||||
|
sampleBamFiles(sample) = sampleFileName
|
||||||
|
add(joinBams(flist, sampleFileName))
|
||||||
|
}
|
||||||
|
|
||||||
add(joinBams(input, joinedBams),
|
println("\nFound the following samples (files created as necessary): ")
|
||||||
target(joinedBams, targetIntervals),
|
for ((sample, file) <- sampleBamFiles)
|
||||||
clean(joinedBams, targetIntervals, cleanedBam),
|
println("\t" + sample + " -> " + file)
|
||||||
|
|
||||||
|
val globalIntervals = new File(outName + ".intervals")
|
||||||
|
if (knownsOnly)
|
||||||
|
add(target(null, globalIntervals))
|
||||||
|
|
||||||
|
for ((sample, bam) <- sampleBamFiles) {
|
||||||
|
|
||||||
|
// BAM files generated by the pipeline
|
||||||
|
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
|
||||||
|
val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
|
||||||
|
val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
|
||||||
|
|
||||||
|
// Accessory files
|
||||||
|
val targetIntervals = if (knownsOnly) {globalIntervals} else {swapExt(bam, ".bam", ".intervals")}
|
||||||
|
val metricsFile = swapExt(bam, ".bam", ".metrics")
|
||||||
|
val preRecalFile = swapExt(bam, ".bam", ".pre_recal.csv")
|
||||||
|
val postRecalFile = swapExt(bam, ".bam", ".post_recal.csv")
|
||||||
|
val preOutPath = swapExt(bam, ".bam", ".pre")
|
||||||
|
val postOutPath = swapExt(bam, ".bam", ".post")
|
||||||
|
|
||||||
|
if (!knownsOnly)
|
||||||
|
add(target(bam, targetIntervals))
|
||||||
|
|
||||||
|
add(clean(bam, targetIntervals, cleanedBam),
|
||||||
dedup(cleanedBam, dedupedBam, metricsFile),
|
dedup(cleanedBam, dedupedBam, metricsFile),
|
||||||
cov(dedupedBam, preRecalFile),
|
cov(dedupedBam, preRecalFile),
|
||||||
recal(dedupedBam, preRecalFile, recalBam),
|
recal(dedupedBam, preRecalFile, recalBam),
|
||||||
|
|
@ -91,6 +125,7 @@ class dataProcessingV2 extends QScript {
|
||||||
analyzeCovariates(preRecalFile, preOutPath),
|
analyzeCovariates(preRecalFile, preOutPath),
|
||||||
analyzeCovariates(postRecalFile, postOutPath))
|
analyzeCovariates(postRecalFile, postOutPath))
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// General arguments to all programs
|
// General arguments to all programs
|
||||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
trait CommandLineGATKArgs extends CommandLineGATK {
|
||||||
|
|
@ -100,11 +135,11 @@ class dataProcessingV2 extends QScript {
|
||||||
this.isIntermediate = true
|
this.isIntermediate = true
|
||||||
}
|
}
|
||||||
|
|
||||||
case class joinBams (inBams: File, outBam: File) extends PicardBamJarFunction {
|
case class joinBams (inBams: List[File], outBam: File) extends PicardBamJarFunction {
|
||||||
@Input(doc="input bam list") var join = inBams
|
@Input(doc="input bam list") var join = inBams
|
||||||
@Output(doc="joined bam") var joined = outBam
|
@Output(doc="joined bam") var joined = outBam
|
||||||
@Output(doc="joined bam index") var joinedIndex = new File(outBam + "bai")
|
@Output(doc="joined bam index") var joinedIndex = new File(outBam + "bai")
|
||||||
override def inputBams = List(join)
|
override def inputBams = join
|
||||||
override def outputBam = joined
|
override def outputBam = joined
|
||||||
override def commandLine = super.commandLine + " CREATE_INDEX=true"
|
override def commandLine = super.commandLine + " CREATE_INDEX=true"
|
||||||
this.memoryLimit = Some(6)
|
this.memoryLimit = Some(6)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue