From 04d8bcaf1941226842e6a5beab179acdfaeaf992 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Mon, 22 Aug 2011 14:42:29 -0400 Subject: [PATCH] Fixed bai removal on picard tools BAM index files were not being deleted because picard replaces the name of the file with bai instead of appending to it. --- .../qscripts/DataProcessingPipeline.scala | 62 +++++++++++++------ .../picard/AddOrReplaceReadGroups.scala | 8 ++- .../extensions/picard/MarkDuplicates.scala | 9 ++- .../extensions/picard/MergeSamFiles.scala | 10 ++- .../queue/extensions/picard/RevertSam.scala | 21 +++++-- .../queue/extensions/picard/SortSam.scala | 11 +++- 6 files changed, 91 insertions(+), 30 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 03d1d7766..38109dd9b 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -142,9 +142,9 @@ class DataProcessingPipeline extends QScript { println (f) println() - val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam") + val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list") sampleBamFiles(sample) = sampleFileName - add(joinBams(flist, sampleFileName)) + add(writeList(flist, sampleFileName)) } println("*** INPUT FILES ***\n\n") @@ -170,18 +170,20 @@ class DataProcessingPipeline extends QScript { var realignedBams: List[File] = List() var index = 1 for (bam <- bams) { - val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" ) + // first revert the BAM file to the original qualities + val revertedBAM = revertBAM(bam) + val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" ) val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") if (useBWAse) { - add(bwa_aln_se(bam, saiFile1), - bwa_sam_se(bam, saiFile1, realignedSamFile)) + add(bwa_aln_se(revertedBAM, saiFile1), + bwa_sam_se(revertedBAM, saiFile1, realignedSamFile)) } else { - add(sortSam(bam, readSortedBam, SortOrder.queryname), + add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname), bwa_aln_pe(readSortedBam, saiFile1, 1), bwa_aln_pe(readSortedBam, saiFile2, 2), bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile)) @@ -203,6 +205,19 @@ class DataProcessingPipeline extends QScript { ConsensusDeterminationModel.USE_READS } + def revertBams(bams: List[File]): List[File] = { + var revertedBAMList: List[File] = List() + for (bam <- bams) + revertedBAMList :+= revertBAM(bam) + return revertedBAMList + } + + def revertBAM(bam: File): File = { + val revertedBAM = swapExt(bam, ".bam", ".reverted.bam") + add(revert(bam, revertedBAM)) + return revertedBAM + } + /**************************************************************************** * Main script @@ -217,17 +232,17 @@ class DataProcessingPipeline extends QScript { val bams = QScriptUtils.createListFromFile(input) nContigs = QScriptUtils.getNumberOfContigs(bams(0)) - val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams} + val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)} // Generate a BAM file per sample joining all per lane files if necessary - val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams) + val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs) // Final output list of processed bam files var cohortList: List[File] = List() // Simple progress report println("\nFound the following samples: ") - for ((sample, file) <- sampleBamFiles) + for ((sample, file) <- sampleBAMFiles) println("\t" + sample + " -> " + file) println("\n") @@ -237,7 +252,8 @@ class DataProcessingPipeline extends QScript { add(target(null, globalIntervals)) // Put each sample through the pipeline - for ((sample, bam) <- sampleBamFiles) { + for ((sample, sampleFile) <- sampleBAMFiles) { + val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile} // BAM files generated by the pipeline val cleanedBam = swapExt(bam, ".bam", ".clean.bam") @@ -254,17 +270,19 @@ class DataProcessingPipeline extends QScript { val preValidateLog = swapExt(bam, ".bam", ".pre.validation") val postValidateLog = swapExt(bam, ".bam", ".post.validation") + + //todo -- update the validation to work with .list files // Validation is an optional step for the BAM file generated after // alignment and the final bam file of the pipeline. - if (!noValidation) { - add(validate(bam, preValidateLog), - validate(recalBam, postValidateLog)) - } +// if (!noValidation) { +// add(validate(bam, preValidateLog), +// validate(recalBam, postValidateLog)) +// } if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) - add(target(bam, targetIntervals)) + add(target(sampleFile, targetIntervals)) - add(clean(bam, targetIntervals, cleanedBam), + add(clean(sampleFile, targetIntervals, cleanedBam), dedup(cleanedBam, dedupedBam, metricsFile), cov(dedupedBam, preRecalFile), recal(dedupedBam, preRecalFile, recalBam), @@ -318,7 +336,6 @@ class DataProcessingPipeline extends QScript { } case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input_file :+= inBams this.targetIntervals = tIntervals this.out = outBam @@ -373,7 +390,6 @@ class DataProcessingPipeline extends QScript { } case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inBam) this.output = outBam this.metrics = metricsFile @@ -383,7 +399,6 @@ class DataProcessingPipeline extends QScript { } case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = inBams this.output = outBam this.analysisName = queueLogDir + outBam + ".joinBams" @@ -391,7 +406,6 @@ class DataProcessingPipeline extends QScript { } case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs { - @Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai") this.input = List(inSam) this.output = outBam this.sortOrder = sortOrderP @@ -424,6 +438,14 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".rg" } + case class revert (inBam: File, outBam: File) extends RevertSam with ExternalCommonArgs { + this.output = outBam + this.input :+= inBam + this.analysisName = queueLogDir + outBam + "revert" + this.jobName = queueLogDir + outBam + ".revert" + + } + case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/AddOrReplaceReadGroups.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/AddOrReplaceReadGroups.scala index 2508f5776..6413317e6 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/AddOrReplaceReadGroups.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/AddOrReplaceReadGroups.scala @@ -21,7 +21,7 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav var output: File = _ @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) - var outputIndex: File = new File(output + ".bai") + var outputIndex: File = _ @Argument(doc="Read group ID", shortName = "id", fullName = "read_group_id", required = true) var RGID: String = _ @@ -44,6 +44,12 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav @Argument(doc = "Read group description", shortName = "ds", fullName = "read_group_description", required = false) var RGDS: String = "" + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + override def inputBams = input override def outputBam = output diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MarkDuplicates.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MarkDuplicates.scala index 6f006ffad..9489e8b07 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MarkDuplicates.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MarkDuplicates.scala @@ -21,7 +21,7 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand var output: File = _ @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) - var outputIndex: File = new File(output + ".bai") + var outputIndex: File = _ @Output(doc="File to write duplication metrics to", shortName = "out_metrics", fullName = "output_metrics_file", required = false) var metrics: File = new File(output + ".metrics") @@ -35,6 +35,13 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) var SORTING_COLLECTION_SIZE_RATIO: Double = -1 + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + override def inputBams = input override def outputBam = output this.sortOrder = null diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MergeSamFiles.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MergeSamFiles.scala index a7e74e1b5..a16c1eba2 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MergeSamFiles.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/MergeSamFiles.scala @@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard import org.broadinstitute.sting.commandline._ import java.io.File +import org.broadinstitute.sting.queue.QScript._ /* * Created by IntelliJ IDEA. @@ -21,7 +22,7 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL var output: File = _ @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) - var outputIndex: File = new File(output + ".bai") + var outputIndex: File = _ @Argument(doc="Merge the seqeunce dictionaries Default value: false. This option can be set to 'null' to clear the default value.", shortName = "merge_dict", fullName = "merge_sequence_dictionaries", required = false) var MERGE_SEQUENCE_DICTIONARIES: Boolean = false @@ -32,6 +33,13 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL @Argument(doc = "Comments to include in the merged output file's header.", shortName = "com", fullName = "comments", required = false) var COMMENT: String = "" + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + override def inputBams = input override def outputBam = output this.createIndex = Some(true) diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/RevertSam.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/RevertSam.scala index 33cd7ccaf..746ce609e 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/RevertSam.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/RevertSam.scala @@ -11,17 +11,17 @@ import java.io.File * Time: 10:35 AM */ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { - analysisName = "SortSam" - javaMainClass = "net.sf.picard.sam.SortSam" + analysisName = "RevertSam" + javaMainClass = "net.sf.picard.sam.RevertSam" @Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "The input SAM or BAM files to revert.") - var input: List[File] = _ + var input: List[File] = Nil @Output(shortName = "output", fullName = "output_bam_file", required = true, doc = "The reverted BAM or SAM output file.") var output: File = _ @Output(shortName = "out_index", fullName = "output_bam_index_file", required = false, doc = "The output bam index") - var outputIndex: File = new File(output + ".bai") + var outputIndex: File = _ @Argument(shortName = "roq", fullName = "restore_original_qualities", required = false, doc = "True to restore original qualities from the OQ field to the QUAL field if available.") var restoreOriginalQualities: Boolean = true @@ -33,12 +33,20 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF var removeAlignmentInformation: Boolean = true @Argument(shortName = "atc", fullName = "attributes_to_clear", required = false, doc = "When removing alignment information, the set of optional tags to remove.") - var attributesToClear: List[String] = _ + var attributesToClear: List[String] = Nil @Argument(shortName = "sa", fullName = "sample_alias", required = false, doc = "The sample alias to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.") var sampleAlias: String = null @Argument(shortName = "ln", fullName = "library_name", required = false, doc = "The library name to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.") + var libraryName: String = null + + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + override def inputBams = input override def outputBam = output @@ -48,5 +56,6 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF conditionalParameter(!removeDuplicateInformation, " REMOVE_DUPLICATE_INFORMATION=false") + conditionalParameter(!removeAlignmentInformation, " REMOVE_ALIGNMENT_INFORMATION=false") + conditionalParameter(!attributesToClear.isEmpty, repeat(" ATTRIBUTE_TO_CLEAR=", attributesToClear)) + - conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias) + conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias) + + conditionalParameter(libraryName != null, " LIBRARY_NAME=" + libraryName) } \ No newline at end of file diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SortSam.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SortSam.scala index cc26f7471..520d17a6a 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SortSam.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SortSam.scala @@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard import org.broadinstitute.sting.commandline._ import java.io.File +import org.broadinstitute.sting.queue.QScript._ /* * Created by IntelliJ IDEA. @@ -21,7 +22,15 @@ class SortSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFun var output: File = _ @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) - var outputIndex: File = new File(output + ".bai") + var outputIndex: File = _ + + override def freezeFieldValues() { + super.freezeFieldValues() + if (outputIndex == null && output != null) + outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") + } + + override def inputBams = input override def outputBam = output