Fixed list dependency
Instead of creating a bam list file, I dynamically create a scala list and pass as parameters. This way the intermediate bam files don't get deleted before they should.
This commit is contained in:
parent
219252a566
commit
cd12f7f286
|
|
@ -106,7 +106,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
// Because the realignment only happens after these scripts are executed, in case you are using
|
// Because the realignment only happens after these scripts are executed, in case you are using
|
||||||
// bwa realignment, this function will operate over the original bam files and output over the
|
// bwa realignment, this function will operate over the original bam files and output over the
|
||||||
// (to be realigned) bam files.
|
// (to be realigned) bam files.
|
||||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, File] = {
|
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
|
||||||
|
|
||||||
// Creating a table with SAMPLE information from each input BAM file
|
// Creating a table with SAMPLE information from each input BAM file
|
||||||
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
||||||
|
|
@ -131,24 +131,25 @@ class DataProcessingPipeline extends QScript {
|
||||||
sampleTable(sample) :+= rBam
|
sampleTable(sample) :+= rBam
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return sampleTable.toMap
|
||||||
|
|
||||||
println("\n\n*** INPUT FILES ***\n")
|
// println("\n\n*** INPUT FILES ***\n")
|
||||||
// Creating one file for each sample in the dataset
|
// // Creating one file for each sample in the dataset
|
||||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||||
for ((sample, flist) <- sampleTable) {
|
// for ((sample, flist) <- sampleTable) {
|
||||||
|
//
|
||||||
println(sample + ":")
|
// println(sample + ":")
|
||||||
for (f <- flist)
|
// for (f <- flist)
|
||||||
println (f)
|
// println (f)
|
||||||
println()
|
// println()
|
||||||
|
//
|
||||||
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||||
sampleBamFiles(sample) = sampleFileName
|
// sampleBamFiles(sample) = sampleFileName
|
||||||
add(writeList(flist, sampleFileName))
|
// //add(writeList(flist, sampleFileName))
|
||||||
}
|
// }
|
||||||
println("*** INPUT FILES ***\n\n")
|
// println("*** INPUT FILES ***\n\n")
|
||||||
|
//
|
||||||
return sampleBamFiles.toMap
|
// return sampleBamFiles.toMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// Rebuilds the Read Group string to give BWA
|
// Rebuilds the Read Group string to give BWA
|
||||||
|
|
@ -224,7 +225,10 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
// final output list of processed bam files
|
||||||
|
var cohortList: List[File] = List()
|
||||||
|
|
||||||
|
// sets the model for the Indel Realigner
|
||||||
cleanModelEnum = getIndelCleaningModel()
|
cleanModelEnum = getIndelCleaningModel()
|
||||||
|
|
||||||
// keep a record of the number of contigs in the first bam file in the list
|
// keep a record of the number of contigs in the first bam file in the list
|
||||||
|
|
@ -233,28 +237,19 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
|
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
|
||||||
|
|
||||||
// Generate a BAM file per sample joining all per lane files if necessary
|
// generate a BAM file per sample joining all per lane files if necessary
|
||||||
val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs)
|
val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
|
||||||
|
|
||||||
// Final output list of processed bam files
|
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||||
var cohortList: List[File] = List()
|
|
||||||
|
|
||||||
// Simple progress report
|
|
||||||
println("\nFound the following samples: ")
|
|
||||||
for ((sample, file) <- sampleBAMFiles)
|
|
||||||
println("\t" + sample + " -> " + file)
|
|
||||||
println("\n")
|
|
||||||
|
|
||||||
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
|
||||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||||
if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY)
|
if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||||
add(target(null, globalIntervals))
|
add(target(null, globalIntervals))
|
||||||
|
|
||||||
// Put each sample through the pipeline
|
// put each sample through the pipeline
|
||||||
for ((sample, sampleFile) <- sampleBAMFiles) {
|
for ((sample, bamList) <- sampleBAMFiles) {
|
||||||
val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile}
|
|
||||||
|
|
||||||
// BAM files generated by the pipeline
|
// BAM files generated by the pipeline
|
||||||
|
val bam = new File(qscript.projectName + "." + sample + ".bam")
|
||||||
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
|
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
|
||||||
val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
|
val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
|
||||||
val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
|
val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
|
||||||
|
|
@ -272,15 +267,16 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
// Validation is an optional step for the BAM file generated after
|
// Validation is an optional step for the BAM file generated after
|
||||||
// alignment and the final bam file of the pipeline.
|
// alignment and the final bam file of the pipeline.
|
||||||
if (!noValidation && sampleFile.endsWith(".bam")) { // todo -- implement validation for .list BAM files
|
if (!noValidation) {
|
||||||
|
for (sampleFile <- bamList)
|
||||||
add(validate(sampleFile, preValidateLog),
|
add(validate(sampleFile, preValidateLog),
|
||||||
validate(recalBam, postValidateLog))
|
validate(recalBam, postValidateLog))
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||||
add(target(sampleFile, targetIntervals))
|
add(target(bamList, targetIntervals))
|
||||||
|
|
||||||
add(clean(sampleFile, targetIntervals, cleanedBam),
|
add(clean(bamList, targetIntervals, cleanedBam),
|
||||||
dedup(cleanedBam, dedupedBam, metricsFile),
|
dedup(cleanedBam, dedupedBam, metricsFile),
|
||||||
cov(dedupedBam, preRecalFile),
|
cov(dedupedBam, preRecalFile),
|
||||||
recal(dedupedBam, preRecalFile, recalBam),
|
recal(dedupedBam, preRecalFile, recalBam),
|
||||||
|
|
@ -320,9 +316,9 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.maxRecordsInRam = 100000
|
this.maxRecordsInRam = 100000
|
||||||
}
|
}
|
||||||
|
|
||||||
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||||
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
|
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||||
this.input_file :+= inBams
|
this.input_file = inBams
|
||||||
this.out = outIntervals
|
this.out = outIntervals
|
||||||
this.mismatchFraction = 0.0
|
this.mismatchFraction = 0.0
|
||||||
this.known :+= qscript.dbSNP
|
this.known :+= qscript.dbSNP
|
||||||
|
|
@ -333,8 +329,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
this.jobName = queueLogDir + outIntervals + ".target"
|
this.jobName = queueLogDir + outIntervals + ".target"
|
||||||
}
|
}
|
||||||
|
|
||||||
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||||
this.input_file :+= inBams
|
this.input_file = inBams
|
||||||
this.targetIntervals = tIntervals
|
this.targetIntervals = tIntervals
|
||||||
this.out = outBam
|
this.out = outBam
|
||||||
this.known :+= qscript.dbSNP
|
this.known :+= qscript.dbSNP
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue