added BWA SW alignment
The pipeline now accepts fasta/fastq files and aligns them using BWA SW, adds default basequalities, creates read groups and performs BQSR.
This commit is contained in:
parent
bd1cf4c7bc
commit
22d2563823
|
|
@ -5,6 +5,8 @@ import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.util.QScriptUtils
|
import org.broadinstitute.sting.queue.util.QScriptUtils
|
||||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||||
import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups}
|
import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceReadGroups}
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -16,7 +18,7 @@ import org.broadinstitute.sting.queue.extensions.picard.{SortSam, AddOrReplaceRe
|
||||||
|
|
||||||
class RecalibrateBaseQualities extends QScript {
|
class RecalibrateBaseQualities extends QScript {
|
||||||
|
|
||||||
@Input(doc="input FASTA file, BAM file - or list of FASTA/BAM files. ", shortName="i", required=true)
|
@Input(doc="input FASTA/FASTQ/BAM file - or list of FASTA/FASTQ/BAM files. ", shortName="i", required=true)
|
||||||
var input: File = _
|
var input: File = _
|
||||||
|
|
||||||
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
|
@Input(doc="path to R resources folder inside the Sting repository", fullName="path_to_r", shortName="r", required=true)
|
||||||
|
|
@ -28,51 +30,67 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
@Input(doc="dbsnp VCF file to use ", shortName="D", required=true)
|
@Input(doc="dbsnp VCF file to use ", shortName="D", required=true)
|
||||||
var dbSNP: File = _
|
var dbSNP: File = _
|
||||||
|
|
||||||
@Input(doc="Default base qualities. Overrides the file's original base qualities with given value. Must be used if the file does not have base qualities." , shortName = "dbq", required=false)
|
@Input(doc="Number of jobs to scatter/gather. Default: 0." , shortName = "sg", required=false)
|
||||||
var dbq: Int = -1
|
var threads: Int = 0
|
||||||
|
|
||||||
@Input(doc="Number of jobs to scatter/gather. Default is the number of contigs in the dataset" , shortName = "sg", required=false)
|
@Input(doc="Sample Name to fill in the Read Group information (only necessary if using fasta/fastq)" , shortName = "sn", required=false)
|
||||||
var threads: Int = -1
|
var sample: String = "NA"
|
||||||
|
|
||||||
@Input(doc="Sample Name" , shortName = "sn", required=false)
|
@Input(doc="The path to the binary of bwa to align fasta/fastq files", fullName="path_to_bwa", shortName="bwa", required=false)
|
||||||
var sample: String = ""
|
|
||||||
|
|
||||||
@Input(doc="The path to the binary of bwa (usually BAM files have already been mapped - but if you want to remap this is the option)", fullName="path_to_bwa", shortName="bwa", required=false)
|
|
||||||
var bwaPath: File = _
|
var bwaPath: File = _
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Input(doc="The default base qualities to use before recalibration. Default is Q20 (should be good for every dataset)." , shortName = "dbq", required=false)
|
||||||
|
var dbq: Int = 20
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
var nContigs: Int = 0
|
|
||||||
var ADD_BASE_QUALITIES = false
|
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
if (dbq >= 0)
|
val fileList: List[File] = QScriptUtils.createListFromFile(input)
|
||||||
ADD_BASE_QUALITIES = true
|
|
||||||
|
|
||||||
val fileList = QScriptUtils.createListFromFile(input)
|
for (file: File <- fileList) {
|
||||||
nContigs = if (threads >= 0) {threads} else {QScriptUtils.getNumberOfContigs(fileList(0))}
|
|
||||||
|
|
||||||
for (file <- fileList) {
|
var USE_BWA: Boolean = false
|
||||||
val qualBam: File = swapExt(file, ".bam", ".quals.bam")
|
|
||||||
val rgBam: File = if (ADD_BASE_QUALITIES) {swapExt(file, ".bam", ".rg.bam")} else {file}
|
println("DEBUG: processing " + file + "\nDEBUG: name -- " + file.getName)
|
||||||
val recalFile1: File = swapExt(file, ".bam", ".recal1.csv")
|
|
||||||
val recalFile2: File = swapExt(file, ".bam", ".recal2.csv")
|
if (file.endsWith(".fasta") || file.endsWith(".fq")) {
|
||||||
val recalBam: File = swapExt(file, ".bam", ".recal.bam")
|
if (bwaPath == null) {
|
||||||
|
throw new UserException("You provided a fasta/fastq file but didn't provide the path for BWA");
|
||||||
|
}
|
||||||
|
USE_BWA = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// FASTA -> BAM steps
|
||||||
|
val alignedSam: File = file.getName + ".aligned.sam"
|
||||||
|
val sortedBam: File = swapExt(alignedSam, ".sam", ".bam")
|
||||||
|
val qualBam: File = swapExt(sortedBam, ".bam", ".q.bam")
|
||||||
|
val rgBam: File = swapExt(file, ".bam", ".rg.bam")
|
||||||
|
|
||||||
|
val bamBase = if (USE_BWA) {rgBam} else {file}
|
||||||
|
|
||||||
|
// BAM Steps
|
||||||
|
val recalFile1: File = swapExt(bamBase, ".bam", ".recal1.csv")
|
||||||
|
val recalFile2: File = swapExt(bamBase, ".bam", ".recal2.csv")
|
||||||
|
val recalBam: File = swapExt(bamBase, ".bam", ".recal.bam")
|
||||||
val path1: String = recalBam + ".before"
|
val path1: String = recalBam + ".before"
|
||||||
val path2: String = recalBam + ".after"
|
val path2: String = recalBam + ".after"
|
||||||
|
|
||||||
|
|
||||||
if (ADD_BASE_QUALITIES) {
|
if (USE_BWA) {
|
||||||
add(addQuals(file, qualBam, dbq),
|
add(align(file, alignedSam),
|
||||||
|
sortSam(alignedSam, sortedBam),
|
||||||
|
addQuals(sortedBam, qualBam, dbq),
|
||||||
addReadGroup(qualBam, rgBam, sample))
|
addReadGroup(qualBam, rgBam, sample))
|
||||||
}
|
}
|
||||||
|
|
||||||
add(cov(rgBam, recalFile1),
|
add(cov(bamBase, recalFile1),
|
||||||
recal(rgBam, recalFile1, recalBam),
|
recal(bamBase, recalFile1, recalBam),
|
||||||
cov(recalBam, recalFile2),
|
cov(recalBam, recalFile2),
|
||||||
analyzeCovariates(recalFile1, path1),
|
analyzeCovariates(recalFile1, path1),
|
||||||
analyzeCovariates(recalFile2, path2))
|
analyzeCovariates(recalFile2, path2))
|
||||||
|
|
@ -86,7 +104,7 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
this.isIntermediate = true
|
this.isIntermediate = true
|
||||||
}
|
}
|
||||||
|
|
||||||
trait CommandLineGATKArgs extends CommandLineGATK {
|
trait CommandLineGATKArgs extends CommandLineGATK with ExternalCommonArgs {
|
||||||
this.reference_sequence = reference
|
this.reference_sequence = reference
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,7 +130,7 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
this.DBQ = qual
|
this.DBQ = qual
|
||||||
}
|
}
|
||||||
|
|
||||||
case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups {
|
case class addReadGroup (inBam: File, outBam: File, sample: String) extends AddOrReplaceReadGroups with ExternalCommonArgs {
|
||||||
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
|
@Output(doc="output bai file") var bai = swapExt(outBam, ".bam", ".bai")
|
||||||
this.input = List(inBam)
|
this.input = List(inBam)
|
||||||
this.output = outBam
|
this.output = outBam
|
||||||
|
|
@ -133,7 +151,7 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
this.recal_file = outRecalFile
|
this.recal_file = outRecalFile
|
||||||
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
this.analysisName = queueLogDir + outRecalFile + ".covariates"
|
||||||
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
this.jobName = queueLogDir + outRecalFile + ".covariates"
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = threads
|
||||||
}
|
}
|
||||||
|
|
||||||
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
case class recal (inBam: File, inRecalFile: File, outBam: File) extends TableRecalibration with CommandLineGATKArgs {
|
||||||
|
|
@ -143,7 +161,7 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
this.isIntermediate = false
|
this.isIntermediate = false
|
||||||
this.analysisName = queueLogDir + outBam + ".recalibration"
|
this.analysisName = queueLogDir + outBam + ".recalibration"
|
||||||
this.jobName = queueLogDir + outBam + ".recalibration"
|
this.jobName = queueLogDir + outBam + ".recalibration"
|
||||||
this.scatterCount = nContigs
|
this.scatterCount = threads
|
||||||
}
|
}
|
||||||
|
|
||||||
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
case class analyzeCovariates (inRecalFile: File, outPath: String) extends AnalyzeCovariates {
|
||||||
|
|
|
||||||
|
|
@ -22,15 +22,15 @@ object QScriptUtils {
|
||||||
* to have empty lines and comment lines (lines starting with #).
|
* to have empty lines and comment lines (lines starting with #).
|
||||||
*/
|
*/
|
||||||
def createListFromFile(in: File):List[File] = {
|
def createListFromFile(in: File):List[File] = {
|
||||||
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
// If the file provided ends with .bam, .fasta or .fq, it is not a bam list, we treat it as a single file.
|
||||||
// and return a list with only this file.
|
// and return a list with only this file.
|
||||||
if (in.toString.endsWith(".bam"))
|
if (in.toString.endsWith(".bam") || in.toString.endsWith(".fasta") || in.toString.endsWith(".fq"))
|
||||||
return List(in)
|
return List(in)
|
||||||
|
|
||||||
var list: List[File] = List()
|
var list: List[File] = List()
|
||||||
for (bam <- fromFile(in).getLines)
|
for (file <- fromFile(in).getLines)
|
||||||
if (!bam.startsWith("#") && !bam.isEmpty )
|
if (!file.startsWith("#") && !file.isEmpty )
|
||||||
list :+= new File(bam.trim())
|
list :+= new File(file.trim())
|
||||||
list.sortWith(_.compareTo(_) < 0)
|
list.sortWith(_.compareTo(_) < 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue