BLASR BAMs and new BWA parameters

*Added the functions to turn a BLASR generated BAM file into a usable BAM file.
*Modified the bwa parameters according to test results from NA12878 pb2k dataset.
This commit is contained in:
Mauricio Carneiro 2011-08-24 17:02:44 -04:00
parent e3f5d7067a
commit 16caca0822
2 changed files with 43 additions and 28 deletions

View File

@ -4,9 +4,9 @@ import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.util.QScriptUtils import org.broadinstitute.sting.queue.util.QScriptUtils
import net.sf.samtools.SAMFileHeader.SortOrder import net.sf.samtools.SAMFileHeader.SortOrder
import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups}
import org.broadinstitute.sting.utils.exceptions.UserException import org.broadinstitute.sting.utils.exceptions.UserException
import org.broadinstitute.sting.commandline.Hidden import org.broadinstitute.sting.commandline.Hidden
import org.broadinstitute.sting.queue.extensions.picard.{ReorderSam, SortSam, AddOrReplaceReadGroups}
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -16,7 +16,7 @@ import org.broadinstitute.sting.commandline.Hidden
*/ */
class RecalibrateBaseQualities extends QScript { class PacbioProcessingPipeline extends QScript {
@Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true) @Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
var input: File = _ var input: File = _
@ -39,13 +39,16 @@ class RecalibrateBaseQualities extends QScript {
@Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false) @Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false)
var bwaPath: File = _ var bwaPath: File = _
@Input(doc="Input is a BLASR generated BAM file", shortName = "blasr", fullName="blasr_bam", required=false)
var BLASR_BAM: Boolean = false
@Hidden @Hidden
@Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false) @Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false)
var dbq: Int = 20 var dbq: Int = 20
@Hidden
@Input(shortName="bwastring", required=false)
var bwastring: String = ""
val queueLogDir: String = ".qlog/" val queueLogDir: String = ".qlog/"
@ -57,8 +60,6 @@ class RecalibrateBaseQualities extends QScript {
var USE_BWA: Boolean = false var USE_BWA: Boolean = false
println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName)
if (file.endsWith(".fasta") || file.endsWith(".fq")) { if (file.endsWith(".fasta") || file.endsWith(".fq")) {
if (bwaPath == null) { if (bwaPath == null) {
throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA"); throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA");
@ -69,28 +70,34 @@ class RecalibrateBaseQualities extends QScript {
// FASTA -> BAM steps // FASTA -> BAM steps
val alignedSam: File = file.getName + ".aligned.sam" val alignedSam: File = file.getName + ".aligned.sam"
val sortedBam: File = swapExt(alignedSam, ".sam", ".bam") val sortedBam: File = swapExt(alignedSam, ".sam", ".bam")
val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam")
val rgBam: File = swapExt(file, ".bam", ".rg.bam") val rgBam: File = swapExt(file, ".bam", ".rg.bam")
val bamBase = if (USE_BWA) {rgBam} else {file} val bamBase = if (USE_BWA) {rgBam} else {file}
// BAM Steps // BAM Steps
val mqBAM: File = swapExt(bamBase, ".bam", ".mq.bam")
val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv") val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv")
val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv") val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv")
val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam") val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam")
val path1: String = recalBam + ".before" val path1: String = recalBam + ".before"
val path2: String = recalBam + ".after" val path2: String = recalBam + ".after"
if (USE_BWA) { if (USE_BWA) {
add(align(file, alignedSam), add(align(file, alignedSam),
sortSam(alignedSam, sortedBam), sortSam(alignedSam, sortedBam),
addQuals(sortedBam, qualBam, dbq), addReadGroup(sortedBam, rgBam, sample))
addReadGroup(qualBam, rgBam, sample))
} }
add(cov(bamBase, recalFile1), else if (BLASR_BAM) {
recal(bamBase, recalFile1, recalBam), val reorderedBAM = swapExt(bamBase, ".bam", ".reordered.bam")
add(reorder(bamBase, reorderedBAM),
changeMQ(reorderedBAM, mqBAM))
}
val bam = if (BLASR_BAM) {mqBAM} else {bamBase}
add(cov(bam, recalFile1),
recal(bam, recalFile1, recalBam),
cov(recalBam, recalFile2), cov(recalBam, recalFile2),
analyzeCovariates(recalFile1, path1), analyzeCovariates(recalFile1, path1),
analyzeCovariates(recalFile2, path2)) analyzeCovariates(recalFile2, path2))
@ -110,13 +117,13 @@ class RecalibrateBaseQualities extends QScript {
case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs { case class align(@Input inFastq: File, @Output outSam: File) extends ExternalCommonArgs {
def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z10 -t8 " + reference + " " + inFastq + " > " + outSam def commandLine = bwaPath + " bwasw -b5 -q2 -r1 -z20 -t16 " + reference + " " + inFastq + " > " + outSam
this.memoryLimit = 8
this.analysisName = queueLogDir + outSam + ".bwa_sam_se" this.analysisName = queueLogDir + outSam + ".bwa_sam_se"
this.jobName = queueLogDir + outSam + ".bwa_sam_se" this.jobName = queueLogDir + outSam + ".bwa_sam_se"
} }
case class sortSam (@Input inSam: File, @Output outBam: File) extends SortSam with ExternalCommonArgs { case class sortSam (inSam: File, outBam: File) extends SortSam with ExternalCommonArgs {
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
this.input = List(inSam) this.input = List(inSam)
this.output = outBam this.output = outBam
this.sortOrder = SortOrder.coordinate this.sortOrder = SortOrder.coordinate
@ -124,10 +131,16 @@ class RecalibrateBaseQualities extends QScript {
this.jobName = queueLogDir + outBam + ".sortSam" this.jobName = queueLogDir + outBam + ".sortSam"
} }
case class addQuals(inBam: File, outBam: File, qual: Int) extends PrintReads with CommandLineGATKArgs { case class reorder (inSam: File, outSam: File) extends ReorderSam with ExternalCommonArgs {
this.input = List(inSam)
this.output = outSam
this.sortReference = reference
}
case class changeMQ(inBam: File, outBam: File) extends PrintReads with CommandLineGATKArgs {
this.input_file :+= inBam this.input_file :+= inBam
this.out = outBam this.out = outBam
this.DBQ = qual this.read_filter :+= "ReassignMappingQuality"
} }
case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs { case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs {
@ -145,6 +158,7 @@ class RecalibrateBaseQualities extends QScript {
} }
case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs { case class cov (inBam: File, outRecalFile: File) extends CountCovariates with CommandLineGATKArgs {
this.DBQ = dbq
this.knownSites :+= dbSNP this.knownSites :+= dbSNP
this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate") this.covariate ++= List("ReadGroupCovariate", "QualityScoreCovariate", "CycleCovariate", "DinucCovariate")
this.input_file :+= inBam this.input_file :+= inBam
@ -155,6 +169,7 @@ class RecalibrateBaseQualities extends QScript {
} }
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs { case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
this.DBQ = dbq
this.input_file :+= inBam this.input_file :+= inBam
this.recal_file = inRecalFile this.recal_file = inRecalFile
this.out = outBam this.out = outBam

View File

@ -11,7 +11,7 @@ import java.io.File
*/ */
class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "ReorderSam" analysisName = "ReorderSam"
javaMainClass = "net.sf.picard.sam.SortSam" javaMainClass = "net.sf.picard.sam.ReorderSam"
@Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true) @Input(doc="Input file (bam or sam) to extract reads from.", shortName = "input", fullName = "input_bam_files", required = true)
var input: List[File] = Nil var input: List[File] = Nil
@ -22,27 +22,27 @@ class ReorderSam extends org.broadinstitute.sting.queue.function.JavaCommandLine
@Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false) @Output(doc="The output bam index", shortName = "out_index", fullName = "output_bam_index_file", required = false)
var outputIndex: File = _ var outputIndex: File = _
@Argument(doc="Reference sequence to reorder reads to match.", shortName = "input", fullName = "input_bam_files", required = true) @Argument(doc="Reference sequence to reorder reads to match.", shortName = "ref", fullName = "sort_reference", required = true)
var reference: File = _ var sortReference: File = _
@Argument(doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs. By default, this tool requires a corresponding contig in the new reference for each read contig.", shortName = "aic", fullName = "allow_incomplete_concordance", required = false) @Argument(doc="If true, then allows only a partial overlap of the BAM contigs with the new reference sequence contigs. By default, this tool requires a corresponding contig in the new reference for each read contig.", shortName = "aic", fullName = "allow_incomplete_concordance", required = false)
var ALLOW_INCOMPLETE_DICT_CONCORDANCE: Boolean = false var ALLOW_INCOMPLETE_DICT_CONCORDANCE: Boolean = _
@Argument(doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length. Highly dangerous, only use if you know what you are doing.", shortName = "acld", fullName = "allow_contig_length_discordance", required = false) @Argument(doc="If true, then permits mapping from a read contig to a new reference contig with the same name but a different length. Highly dangerous, only use if you know what you are doing.", shortName = "acld", fullName = "allow_contig_length_discordance", required = false)
var ALLOW_CONTIG_LENGTH_DISCORDANCE: Boolean = false var ALLOW_CONTIG_LENGTH_DISCORDANCE: Boolean = _
override def freezeFieldValues() { override def freezeFieldValues() {
super.freezeFieldValues() super.freezeFieldValues()
if (outputIndex == null && output != null && output.toString.endsWith(".bam")) if (outputIndex == null && output != null)
outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai") outputIndex = new File(output.getName.stripSuffix(".bam") + ".bai")
} }
override def inputBams = input override def inputBams = input
override def outputBam = output override def outputBam = output
this.createIndex = Some(outputIndex != null) this.createIndex = Some(true)
this.sortOrder = null this.sortOrder = null
override def commandLine = super.commandLine + override def commandLine = super.commandLine +
" REFERENCE=" + reference + " REFERENCE=" + sortReference +
conditionalParameter (ALLOW_INCOMPLETE_DICT_CONCORDANCE, " ALLOW_INCOMPLETE_DICT_CONCORDANCE=true") optional(" ALLOW_INCOMPLETE_DICT_CONCORDANCE=", ALLOW_INCOMPLETE_DICT_CONCORDANCE)
conditionalParameter (ALLOW_CONTIG_LENGTH_DISCORDANCE, " ALLOW_CONTIG_LENGTH_DISCORDANCE=true") optional(" ALLOW_CONTIG_LENGTH_DISCORDANCE=", ALLOW_CONTIG_LENGTH_DISCORDANCE)
} }