Fixed bai removal on picard tools

BAM index files were not being deleted because picard replaces the name of the file with bai instead of appending to it.
This commit is contained in:
Mauricio Carneiro 2011-08-22 14:42:29 -04:00
parent 8aed151a71
commit 04d8bcaf19
6 changed files with 91 additions and 30 deletions

View File

@ -142,9 +142,9 @@ class DataProcessingPipeline extends QScript {
println (f) println (f)
println() println()
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam") val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
sampleBamFiles(sample) = sampleFileName sampleBamFiles(sample) = sampleFileName
add(joinBams(flist, sampleFileName)) add(writeList(flist, sampleFileName))
} }
println("*** INPUT FILES ***\n\n") println("*** INPUT FILES ***\n\n")
@ -170,18 +170,20 @@ class DataProcessingPipeline extends QScript {
var realignedBams: List[File] = List() var realignedBams: List[File] = List()
var index = 1 var index = 1
for (bam <- bams) { for (bam <- bams) {
val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" ) // first revert the BAM file to the original qualities
val revertedBAM = revertBAM(bam)
val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" )
val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam")
val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam")
if (useBWAse) { if (useBWAse) {
add(bwa_aln_se(bam, saiFile1), add(bwa_aln_se(revertedBAM, saiFile1),
bwa_sam_se(bam, saiFile1, realignedSamFile)) bwa_sam_se(revertedBAM, saiFile1, realignedSamFile))
} }
else { else {
add(sortSam(bam, readSortedBam, SortOrder.queryname), add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname),
bwa_aln_pe(readSortedBam, saiFile1, 1), bwa_aln_pe(readSortedBam, saiFile1, 1),
bwa_aln_pe(readSortedBam, saiFile2, 2), bwa_aln_pe(readSortedBam, saiFile2, 2),
bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile)) bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile))
@ -203,6 +205,19 @@ class DataProcessingPipeline extends QScript {
ConsensusDeterminationModel.USE_READS ConsensusDeterminationModel.USE_READS
} }
def revertBams(bams: List[File]): List[File] = {
var revertedBAMList: List[File] = List()
for (bam <- bams)
revertedBAMList :+= revertBAM(bam)
return revertedBAMList
}
def revertBAM(bam: File): File = {
val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
add(revert(bam, revertedBAM))
return revertedBAM
}
/**************************************************************************** /****************************************************************************
* Main script * Main script
@ -217,17 +232,17 @@ class DataProcessingPipeline extends QScript {
val bams = QScriptUtils.createListFromFile(input) val bams = QScriptUtils.createListFromFile(input)
nContigs = QScriptUtils.getNumberOfContigs(bams(0)) nContigs = QScriptUtils.getNumberOfContigs(bams(0))
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams} val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
// Generate a BAM file per sample joining all per lane files if necessary // Generate a BAM file per sample joining all per lane files if necessary
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams) val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs)
// Final output list of processed bam files // Final output list of processed bam files
var cohortList: List[File] = List() var cohortList: List[File] = List()
// Simple progress report // Simple progress report
println("\nFound the following samples: ") println("\nFound the following samples: ")
for ((sample, file) <- sampleBamFiles) for ((sample, file) <- sampleBAMFiles)
println("\t" + sample + " -> " + file) println("\t" + sample + " -> " + file)
println("\n") println("\n")
@ -237,7 +252,8 @@ class DataProcessingPipeline extends QScript {
add(target(null, globalIntervals)) add(target(null, globalIntervals))
// Put each sample through the pipeline // Put each sample through the pipeline
for ((sample, bam) <- sampleBamFiles) { for ((sample, sampleFile) <- sampleBAMFiles) {
val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile}
// BAM files generated by the pipeline // BAM files generated by the pipeline
val cleanedBam = swapExt(bam, ".bam", ".clean.bam") val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
@ -254,17 +270,19 @@ class DataProcessingPipeline extends QScript {
val preValidateLog = swapExt(bam, ".bam", ".pre.validation") val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
val postValidateLog = swapExt(bam, ".bam", ".post.validation") val postValidateLog = swapExt(bam, ".bam", ".post.validation")
//todo -- update the validation to work with .list files
// Validation is an optional step for the BAM file generated after // Validation is an optional step for the BAM file generated after
// alignment and the final bam file of the pipeline. // alignment and the final bam file of the pipeline.
if (!noValidation) { // if (!noValidation) {
add(validate(bam, preValidateLog), // add(validate(bam, preValidateLog),
validate(recalBam, postValidateLog)) // validate(recalBam, postValidateLog))
} // }
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY) if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
add(target(bam, targetIntervals)) add(target(sampleFile, targetIntervals))
add(clean(bam, targetIntervals, cleanedBam), add(clean(sampleFile, targetIntervals, cleanedBam),
dedup(cleanedBam, dedupedBam, metricsFile), dedup(cleanedBam, dedupedBam, metricsFile),
cov(dedupedBam, preRecalFile), cov(dedupedBam, preRecalFile),
recal(dedupedBam, preRecalFile, recalBam), recal(dedupedBam, preRecalFile, recalBam),
@ -318,7 +336,6 @@ class DataProcessingPipeline extends QScript {
} }
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs { case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input_file :+= inBams this.input_file :+= inBams
this.targetIntervals = tIntervals this.targetIntervals = tIntervals
this.out = outBam this.out = outBam
@ -373,7 +390,6 @@ class DataProcessingPipeline extends QScript {
} }
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs { case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inBam) this.input = List(inBam)
this.output = outBam this.output = outBam
this.metrics = metricsFile this.metrics = metricsFile
@ -383,7 +399,6 @@ class DataProcessingPipeline extends QScript {
} }
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs { case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = inBams this.input = inBams
this.output = outBam this.output = outBam
this.analysisName = queueLogDir + outBam + ".joinBams" this.analysisName = queueLogDir + outBam + ".joinBams"
@ -391,7 +406,6 @@ class DataProcessingPipeline extends QScript {
} }
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs { case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inSam) this.input = List(inSam)
this.output = outBam this.output = outBam
this.sortOrder = sortOrderP this.sortOrder = sortOrderP
@ -424,6 +438,14 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".rg" this.jobName = queueLogDir + outBam + ".rg"
} }
case class revert (inBam: File, outBam: File) extends RevertSam with ExternalCommonArgs {
this.output = outBam
this.input :+= inBam
this.analysisName = queueLogDir + outBam + "revert"
this.jobName = queueLogDir + outBam + ".revert"
}
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam @Input(doc="bam file to be aligned") var bam = inBam
@Output(doc="output sai file") var sai = outSai @Output(doc="output sai file") var sai = outSai

View File

@ -21,7 +21,7 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav
var output: File = _ var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai") var outputIndex: File = _
@Argument(doc="Read group ID", shortName = "id", fullName = "read_group_id", required = true) @Argument(doc="Read group ID", shortName = "id", fullName = "read_group_id", required = true)
var RGID: String = _ var RGID: String = _
@ -44,6 +44,12 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav
@Argument(doc = "Read group description", shortName = "ds", fullName = "read_group_description", required = false) @Argument(doc = "Read group description", shortName = "ds", fullName = "read_group_description", required = false)
var RGDS: String = "" var RGDS: String = ""
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output

View File

@ -21,7 +21,7 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand
var output: File = _ var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai") var outputIndex: File = _
@Output(doc="File to write duplication metrics to", shortName = "out_metrics", fullName = "output_metrics_file", required = false) @Output(doc="File to write duplication metrics to", shortName = "out_metrics", fullName = "output_metrics_file", required = false)
var metrics: File = new File(output + ".metrics") var metrics: File = new File(output + ".metrics")
@ -35,6 +35,13 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand
@Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false) @Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false)
var SORTING_COLLECTION_SIZE_RATIO: Double = -1 var SORTING_COLLECTION_SIZE_RATIO: Double = -1
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output
this.sortOrder = null this.sortOrder = null

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.commandline._
import java.io.File import java.io.File
import org.broadinstitute.sting.queue.QScript._
/* /*
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -21,7 +22,7 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL
var output: File = _ var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai") var outputIndex: File = _
@Argument(doc="Merge the seqeunce dictionaries Default value: false. This option can be set to 'null' to clear the default value.", shortName = "merge_dict", fullName = "merge_sequence_dictionaries", required = false) @Argument(doc="Merge the seqeunce dictionaries Default value: false. This option can be set to 'null' to clear the default value.", shortName = "merge_dict", fullName = "merge_sequence_dictionaries", required = false)
var MERGE_SEQUENCE_DICTIONARIES: Boolean = false var MERGE_SEQUENCE_DICTIONARIES: Boolean = false
@ -32,6 +33,13 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL
@Argument(doc = "Comments to include in the merged output file's header.", shortName = "com", fullName = "comments", required = false) @Argument(doc = "Comments to include in the merged output file's header.", shortName = "com", fullName = "comments", required = false)
var COMMENT: String = "" var COMMENT: String = ""
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output
this.createIndex = Some(true) this.createIndex = Some(true)

View File

@ -11,17 +11,17 @@ import java.io.File
* Time: 10:35 AM * Time: 10:35 AM
*/ */
class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "SortSam" analysisName = "RevertSam"
javaMainClass = "net.sf.picard.sam.SortSam" javaMainClass = "net.sf.picard.sam.RevertSam"
@Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "The input SAM or BAM files to revert.") @Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "The input SAM or BAM files to revert.")
var input: List[File] = _ var input: List[File] = Nil
@Output(shortName = "output", fullName = "output_bam_file", required = true, doc = "The reverted BAM or SAM output file.") @Output(shortName = "output", fullName = "output_bam_file", required = true, doc = "The reverted BAM or SAM output file.")
var output: File = _ var output: File = _
@Output(shortName = "out_index", fullName = "output_bam_index_file", required = false, doc = "The output bam index") @Output(shortName = "out_index", fullName = "output_bam_index_file", required = false, doc = "The output bam index")
var outputIndex: File = new File(output + ".bai") var outputIndex: File = _
@Argument(shortName = "roq", fullName = "restore_original_qualities", required = false, doc = "True to restore original qualities from the OQ field to the QUAL field if available.") @Argument(shortName = "roq", fullName = "restore_original_qualities", required = false, doc = "True to restore original qualities from the OQ field to the QUAL field if available.")
var restoreOriginalQualities: Boolean = true var restoreOriginalQualities: Boolean = true
@ -33,12 +33,20 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
var removeAlignmentInformation: Boolean = true var removeAlignmentInformation: Boolean = true
@Argument(shortName = "atc", fullName = "attributes_to_clear", required = false, doc = "When removing alignment information, the set of optional tags to remove.") @Argument(shortName = "atc", fullName = "attributes_to_clear", required = false, doc = "When removing alignment information, the set of optional tags to remove.")
var attributesToClear: List[String] = _ var attributesToClear: List[String] = Nil
@Argument(shortName = "sa", fullName = "sample_alias", required = false, doc = "The sample alias to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.") @Argument(shortName = "sa", fullName = "sample_alias", required = false, doc = "The sample alias to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.")
var sampleAlias: String = null var sampleAlias: String = null
@Argument(shortName = "ln", fullName = "library_name", required = false, doc = "The library name to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.") @Argument(shortName = "ln", fullName = "library_name", required = false, doc = "The library name to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.")
var libraryName: String = null
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output
@ -48,5 +56,6 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
conditionalParameter(!removeDuplicateInformation, " REMOVE_DUPLICATE_INFORMATION=false") + conditionalParameter(!removeDuplicateInformation, " REMOVE_DUPLICATE_INFORMATION=false") +
conditionalParameter(!removeAlignmentInformation, " REMOVE_ALIGNMENT_INFORMATION=false") + conditionalParameter(!removeAlignmentInformation, " REMOVE_ALIGNMENT_INFORMATION=false") +
conditionalParameter(!attributesToClear.isEmpty, repeat(" ATTRIBUTE_TO_CLEAR=", attributesToClear)) + conditionalParameter(!attributesToClear.isEmpty, repeat(" ATTRIBUTE_TO_CLEAR=", attributesToClear)) +
conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias) conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias) +
conditionalParameter(libraryName != null, " LIBRARY_NAME=" + libraryName)
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._ import org.broadinstitute.sting.commandline._
import java.io.File import java.io.File
import org.broadinstitute.sting.queue.QScript._
/* /*
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -21,7 +22,15 @@ class SortSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFun
var output: File = _ var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai") var outputIndex: File = _
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output