Fixed bai removal on picard tools

BAM index files were not being deleted because picard replaces the name of the file with bai instead of appending to it.
This commit is contained in:
Mauricio Carneiro 2011-08-22 14:42:29 -04:00
parent 8aed151a71
commit 04d8bcaf19
6 changed files with 91 additions and 30 deletions

View File

@ -142,9 +142,9 @@ class DataProcessingPipeline extends QScript {
println (f)
println()
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".bam")
val sampleFileName = new File(qscript.outputDir + qscript.projectName + "." + sample + ".list")
sampleBamFiles(sample) = sampleFileName
add(joinBams(flist, sampleFileName))
add(writeList(flist, sampleFileName))
}
println("*** INPUT FILES ***\n\n")
@ -170,18 +170,20 @@ class DataProcessingPipeline extends QScript {
var realignedBams: List[File] = List()
var index = 1
for (bam <- bams) {
val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" )
// first revert the BAM file to the original qualities
val revertedBAM = revertBAM(bam)
val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" )
val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam")
val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam")
if (useBWAse) {
add(bwa_aln_se(bam, saiFile1),
bwa_sam_se(bam, saiFile1, realignedSamFile))
add(bwa_aln_se(revertedBAM, saiFile1),
bwa_sam_se(revertedBAM, saiFile1, realignedSamFile))
}
else {
add(sortSam(bam, readSortedBam, SortOrder.queryname),
add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname),
bwa_aln_pe(readSortedBam, saiFile1, 1),
bwa_aln_pe(readSortedBam, saiFile2, 2),
bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile))
@ -203,6 +205,19 @@ class DataProcessingPipeline extends QScript {
ConsensusDeterminationModel.USE_READS
}
def revertBams(bams: List[File]): List[File] = {
var revertedBAMList: List[File] = List()
for (bam <- bams)
revertedBAMList :+= revertBAM(bam)
return revertedBAMList
}
def revertBAM(bam: File): File = {
val revertedBAM = swapExt(bam, ".bam", ".reverted.bam")
add(revert(bam, revertedBAM))
return revertedBAM
}
/****************************************************************************
* Main script
@ -217,17 +232,17 @@ class DataProcessingPipeline extends QScript {
val bams = QScriptUtils.createListFromFile(input)
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams)}
// Generate a BAM file per sample joining all per lane files if necessary
val sampleBamFiles: Map[String, File] = createSampleFiles(bams, realignedBams)
val sampleBAMFiles: Map[String, File] = createSampleFiles(bams, realignedBAMs)
// Final output list of processed bam files
var cohortList: List[File] = List()
// Simple progress report
println("\nFound the following samples: ")
for ((sample, file) <- sampleBamFiles)
for ((sample, file) <- sampleBAMFiles)
println("\t" + sample + " -> " + file)
println("\n")
@ -237,7 +252,8 @@ class DataProcessingPipeline extends QScript {
add(target(null, globalIntervals))
// Put each sample through the pipeline
for ((sample, bam) <- sampleBamFiles) {
for ((sample, sampleFile) <- sampleBAMFiles) {
val bam = if (sampleFile.endsWith(".list")) {swapExt(sampleFile, ".list", ".bam")} else {sampleFile}
// BAM files generated by the pipeline
val cleanedBam = swapExt(bam, ".bam", ".clean.bam")
@ -254,17 +270,19 @@ class DataProcessingPipeline extends QScript {
val preValidateLog = swapExt(bam, ".bam", ".pre.validation")
val postValidateLog = swapExt(bam, ".bam", ".post.validation")
//todo -- update the validation to work with .list files
// Validation is an optional step for the BAM file generated after
// alignment and the final bam file of the pipeline.
if (!noValidation) {
add(validate(bam, preValidateLog),
validate(recalBam, postValidateLog))
}
// if (!noValidation) {
// add(validate(bam, preValidateLog),
// validate(recalBam, postValidateLog))
// }
if (cleaningModel != ConsensusDeterminationModel.KNOWNS_ONLY)
add(target(bam, targetIntervals))
add(target(sampleFile, targetIntervals))
add(clean(bam, targetIntervals, cleanedBam),
add(clean(sampleFile, targetIntervals, cleanedBam),
dedup(cleanedBam, dedupedBam, metricsFile),
cov(dedupedBam, preRecalFile),
recal(dedupedBam, preRecalFile, recalBam),
@ -318,7 +336,6 @@ class DataProcessingPipeline extends QScript {
}
case class clean (inBams: File, tIntervals: File, outBam: File) extends IndelRealigner with CommandLineGATKArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input_file :+= inBams
this.targetIntervals = tIntervals
this.out = outBam
@ -373,7 +390,6 @@ class DataProcessingPipeline extends QScript {
}
case class dedup (inBam: File, outBam: File, metricsFile: File) extends MarkDuplicates with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inBam)
this.output = outBam
this.metrics = metricsFile
@ -383,7 +399,6 @@ class DataProcessingPipeline extends QScript {
}
case class joinBams (inBams: List[File], outBam: File) extends MergeSamFiles with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = inBams
this.output = outBam
this.analysisName = queueLogDir + outBam + ".joinBams"
@ -391,7 +406,6 @@ class DataProcessingPipeline extends QScript {
}
case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inSam)
this.output = outBam
this.sortOrder = sortOrderP
@ -424,6 +438,14 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".rg"
}
case class revert (inBam: File, outBam: File) extends RevertSam with ExternalCommonArgs {
this.output = outBam
this.input :+= inBam
this.analysisName = queueLogDir + outBam + "revert"
this.jobName = queueLogDir + outBam + ".revert"
}
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="bam file to be aligned") var bam = inBam
@Output(doc="output sai file") var sai = outSai

View File

@ -21,7 +21,7 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
var outputIndex: File = _
@Argument(doc="Read group ID", shortName = "id", fullName = "read_group_id", required = true)
var RGID: String = _
@ -44,6 +44,12 @@ class AddOrReplaceReadGroups extends org.broadinstitute.sting.queue.function.Jav
@Argument(doc = "Read group description", shortName = "ds", fullName = "read_group_description", required = false)
var RGDS: String = ""
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input
override def outputBam = output

View File

@ -21,7 +21,7 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
var outputIndex: File = _
@Output(doc="File to write duplication metrics to", shortName = "out_metrics", fullName = "output_metrics_file", required = false)
var metrics: File = new File(output + ".metrics")
@ -35,6 +35,13 @@ class MarkDuplicates extends org.broadinstitute.sting.queue.function.JavaCommand
@Argument(doc = "This number, plus the maximum RAM available to the JVM, determine the memory footprint used by some of the sorting collections. If you are running out of memory, try reducing this number.", shortName = "sorting_ratio", fullName = "sorting_collection_size_ratio", required = false)
var SORTING_COLLECTION_SIZE_RATIO: Double = -1
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input
override def outputBam = output
this.sortOrder = null

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
import org.broadinstitute.sting.queue.QScript._
/*
* Created by IntelliJ IDEA.
@ -21,7 +22,7 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
var outputIndex: File = _
@Argument(doc="Merge the seqeunce dictionaries Default value: false. This option can be set to 'null' to clear the default value.", shortName = "merge_dict", fullName = "merge_sequence_dictionaries", required = false)
var MERGE_SEQUENCE_DICTIONARIES: Boolean = false
@ -32,6 +33,13 @@ class MergeSamFiles extends org.broadinstitute.sting.queue.function.JavaCommandL
@Argument(doc = "Comments to include in the merged output file's header.", shortName = "com", fullName = "comments", required = false)
var COMMENT: String = ""
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input
override def outputBam = output
this.createIndex = Some(true)

View File

@ -11,17 +11,17 @@ import java.io.File
* Time: 10:35 AM
*/
class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "SortSam"
javaMainClass = "net.sf.picard.sam.SortSam"
analysisName = "RevertSam"
javaMainClass = "net.sf.picard.sam.RevertSam"
@Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "The input SAM or BAM files to revert.")
var input: List[File] = _
var input: List[File] = Nil
@Output(shortName = "output", fullName = "output_bam_file", required = true, doc = "The reverted BAM or SAM output file.")
var output: File = _
@Output(shortName = "out_index", fullName = "output_bam_index_file", required = false, doc = "The output bam index")
var outputIndex: File = new File(output + ".bai")
var outputIndex: File = _
@Argument(shortName = "roq", fullName = "restore_original_qualities", required = false, doc = "True to restore original qualities from the OQ field to the QUAL field if available.")
var restoreOriginalQualities: Boolean = true
@ -33,12 +33,20 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
var removeAlignmentInformation: Boolean = true
@Argument(shortName = "atc", fullName = "attributes_to_clear", required = false, doc = "When removing alignment information, the set of optional tags to remove.")
var attributesToClear: List[String] = _
var attributesToClear: List[String] = Nil
@Argument(shortName = "sa", fullName = "sample_alias", required = false, doc = "The sample alias to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.")
var sampleAlias: String = null
@Argument(shortName = "ln", fullName = "library_name", required = false, doc = "The library name to use in the reverted output file. This will override the existing sample alias in the file and is used only if all the read groups in the input file have the same sample alias.")
var libraryName: String = null
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input
override def outputBam = output
@ -48,5 +56,6 @@ class RevertSam extends org.broadinstitute.sting.queue.function.JavaCommandLineF
conditionalParameter(!removeDuplicateInformation, " REMOVE_DUPLICATE_INFORMATION=false") +
conditionalParameter(!removeAlignmentInformation, " REMOVE_ALIGNMENT_INFORMATION=false") +
conditionalParameter(!attributesToClear.isEmpty, repeat(" ATTRIBUTE_TO_CLEAR=", attributesToClear)) +
conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias)
conditionalParameter(sampleAlias != null, " SAMPLE_ALIAS=" + sampleAlias) +
conditionalParameter(libraryName != null, " LIBRARY_NAME=" + libraryName)
}

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
import org.broadinstitute.sting.queue.QScript._
/*
* Created by IntelliJ IDEA.
@ -21,7 +22,15 @@ class SortSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFun
var output: File = _
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = new File(output + ".bai")
var outputIndex: File = _
override def freezeFieldValues() {
super.freezeFieldValues()
if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
}
override def inputBams = input
override def outputBam = output