Fixed list dependency
Instead of creating a bam list file, I dynamically create a scala list and pass as parameters. This way the intermediate bam files don't get deleted before they should.
This commit is contained in:
parent
219252a566
commit
cd12f7f286
|
|
@ -106,7 +106,7 @@ class DataProcessingPipeline extends QScript {
|
|||
// Because the realignment only happens after these scripts are executed, in case you are using
|
||||
// bwa realignment, this function will operate over the original bam files and output over the
|
||||
// (to be realigned) bam files.
|
||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, File] = {
|
||||
def createSampleFiles(bamFiles: List[File], realignedBamFiles: List[File]): Map[String, List[File]] = {
|
||||
|
||||
// Creating a table with SAMPLE information from each input BAM file
|
||||
val sampleTable = scala.collection.mutable.Map.empty[String, List[File]]
|
||||
|
|
@ -131,24 +131,25 @@ class DataProcessingPipeline extends QScript {
|
|||
sampleTable(sample) :+= rBam
|
||||
}
|
||||
}
|
||||
return sampleTable.toMap
|
||||
|
||||
println("\n\n*** INPUT FILES ***\n")
|
||||
// Creating one file for each sample in the dataset
|
||||
val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
for ((sample, flist) <- sampleTable) {
|
||||
|
||||
println(sample + ":")
|
||||
for (f <- flist)
|
||||
println (f)
|
||||
println()
|
||||
|
||||
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
sampleBamFiles(sample) = sampleFileName
|
||||
add(writeList(flist, sampleFileName))
|
||||
}
|
||||
println("*** INPUT FILES ***\n\n")
|
||||
|
||||
return sampleBamFiles.toMap
|
||||
// println("\n\n*** INPUT FILES ***\n")
|
||||
// // Creating one file for each sample in the dataset
|
||||
// val sampleBamFiles = scala.collection.mutable.Map.empty[String, File]
|
||||
// for ((sample, flist) <- sampleTable) {
|
||||
//
|
||||
// println(sample + ":")
|
||||
// for (f <- flist)
|
||||
// println (f)
|
||||
// println()
|
||||
//
|
||||
// val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
|
||||
// sampleBamFiles(sample) = sampleFileName
|
||||
// //add(writeList(flist, sampleFileName))
|
||||
// }
|
||||
// println("*** INPUT FILES ***\n\n")
|
||||
//
|
||||
// return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
|
|
@ -224,7 +225,10 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
|
||||
def script = {
|
||||
// final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
|
||||
// sets the model for the Indel Realigner
|
||||
cleanModelEnum = getIndelCleaningModel()
|
||||
|
||||
// keep a record of the number of contigs in the first bam file in the list
|
||||
|
|
@ -233,28 +237,19 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
|
||||
|
||||
// Generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs)
|
||||
// generate a BAM file per sample joining all per lane files if necessary
|
||||
val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
|
||||
|
||||
// Final output list of processed bam files
|
||||
var cohortList: List[File] = List()
|
||||
|
||||
// Simple progress report
|
||||
println("\nFound the following samples: ")
|
||||
for ((sample, file) <- sampleBAMFiles)
|
||||
println("\t" + sample + " -> " + file)
|
||||
println("\n")
|
||||
|
||||
// If this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
// if this is a 'knowns only' indel realignment run, do it only once for all samples.
|
||||
val globalIntervals = new File(outputDir + projectName + ".intervals")
|
||||
if (cleaningModel == ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
add(target(null, globalIntervals))
|
||||
|
||||
// Put each sample through the pipeline
|
||||
for ((sample, sampleFile) <- sampleBAMFiles) {
|
||||
val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile}
|
||||
// put each sample through the pipeline
|
||||
for ((sample, bamList) <- sampleBAMFiles) {
|
||||
|
||||
// BAM files generated by the pipeline
|
||||
val bam = new File(qscript.projectName + "." + sample + ".bam")
|
||||
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
|
||||
val dedupedBam = swapExt(bam, ".bam", ".clean.dedup.bam")
|
||||
val recalBam = swapExt(bam, ".bam", ".clean.dedup.recal.bam")
|
||||
|
|
@ -272,15 +267,16 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
// Validation is an optional step for the BAM file generated after
|
||||
// alignment and the final bam file of the pipeline.
|
||||
if (!noValidation && sampleFile.endsWith(".bam")) { // todo -- implement validation for .list BAM files
|
||||
if (!noValidation) {
|
||||
for (sampleFile <- bamList)
|
||||
add(validate(sampleFile, preValidateLog),
|
||||
validate(recalBam, postValidateLog))
|
||||
}
|
||||
|
||||
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
add(target(sampleFile, targetIntervals))
|
||||
add(target(bamList, targetIntervals))
|
||||
|
||||
add(clean(sampleFile, targetIntervals, cleanedBam),
|
||||
add(clean(bamList, targetIntervals, cleanedBam),
|
||||
dedup(cleanedBam, dedupedBam, metricsFile),
|
||||
cov(dedupedBam, preRecalFile),
|
||||
recal(dedupedBam, preRecalFile, recalBam),
|
||||
|
|
@ -320,9 +316,9 @@ class DataProcessingPipeline extends QScript {
|
|||
this.maxRecordsInRam = 100000
|
||||
}
|
||||
|
||||
case class target (inBams: File, outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
case class target (inBams: List[File], outIntervals: File) extends RealignerTargetCreator with CommandLineGATKArgs {
|
||||
if (cleanModelEnum != ConsensusDeterminationModel.KNOWNS_ONLY)
|
||||
this.input_file :+= inBams
|
||||
this.input_file = inBams
|
||||
this.out = outIntervals
|
||||
this.mismatchFraction = 0.0
|
||||
this.known :+= qscript.dbSNP
|
||||
|
|
@ -333,8 +329,8 @@ class DataProcessingPipeline extends QScript {
|
|||
this.jobName = queueLogDir + outIntervals + ".target"
|
||||
}
|
||||
|
||||
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
this.input_file :+= inBams
|
||||
case class clean (inBams: List[File], tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
|
||||
this.input_file = inBams
|
||||
this.targetIntervals = tIntervals
|
||||
this.out = outBam
|
||||
this.known :+= qscript.dbSNP
|
||||
|
|
|
|||
Loading…
Reference in New Issue