Added BWASW support to the pipeline

Data Processing Pipeline can now use BWASW for realigning the reads. Useful for Ion Torrent data.
This commit is contained in:
Mauricio Carneiro 2011-10-20 18:36:28 -04:00
parent 093cd95c5d
commit c9d8b22092
3 changed files with 109 additions and 9 deletions

View File

@ -65,6 +65,9 @@ class DataProcessingPipeline extends QScript {
@Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false)
var useBWApe: Boolean = false
@Input(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false)
var useBWAsw: Boolean = false
@Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false)
var bwaThreads: Int = 1
@ -162,22 +165,28 @@ class DataProcessingPipeline extends QScript {
var index = 1
for (bam <- bams) {
// first revert the BAM file to the original qualities
val revertedBAM = revertBAM(bam, true)
val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" )
val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam")
val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam")
if (useBWAse) {
val revertedBAM = revertBAM(bam, true)
add(bwa_aln_se(revertedBAM, saiFile1),
bwa_sam_se(revertedBAM, saiFile1, realignedSamFile))
}
else {
add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname),
bwa_aln_pe(readSortedBam, saiFile1, 1),
bwa_aln_pe(readSortedBam, saiFile2, 2),
bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile))
else if (useBWApe) {
val revertedBAM = revertBAM(bam, true)
add(bwa_aln_pe(revertedBAM, saiFile1, 1),
bwa_aln_pe(revertedBAM, saiFile2, 2),
bwa_sam_pe(revertedBAM, saiFile1, saiFile2, realignedSamFile))
}
else if (useBWAsw) {
val revertedBAM = revertBAM(bam, false)
val fastQ = swapExt(revertedBAM, ".bam", ".fq")
add(convertToFastQ(revertedBAM, fastQ),
bwa_sw(fastQ, realignedSamFile))
}
add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate))
addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam))
@ -226,7 +235,7 @@ class DataProcessingPipeline extends QScript {
if (nContigs < 0)
nContigs = QScriptUtils.getNumberOfContigs(bams(0))
val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams, false)}
val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)}
// generate a BAM file per sample joining all per lane files if necessary
val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs)
@ -427,9 +436,16 @@ class DataProcessingPipeline extends QScript {
this.output = outBam
this.input :+= inBam
this.removeAlignmentInformation = removeAlignmentInfo;
this.sortOrder = if (removeAlignmentInfo) {SortOrder.queryname} else {SortOrder.coordinate}
this.analysisName = queueLogDir + outBam + "revert"
this.jobName = queueLogDir + outBam + ".revert"
}
case class convertToFastQ (inBam: File, outFQ: File) extends SamToFastq with ExternalCommonArgs {
this.input :+= inBam
this.fastq = outFQ
this.analysisName = queueLogDir + outFQ + "convert_to_fastq"
this.jobName = queueLogDir + outFQ + ".convert_to_fastq"
}
case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs {
@ -469,6 +485,14 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".bwa_sam_pe"
}
case class bwa_sw (inFastQ: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs {
@Input(doc="fastq file to be aligned") var fq = inFastQ
@Output(doc="output bam file") var bam = outBam
def commandLine = bwaPath + " bwasw -t " + bwaThreads + " " + reference + " " + fq + " > " + bam
this.analysisName = queueLogDir + outBam + ".bwasw"
this.jobName = queueLogDir + outBam + ".bwasw"
}
case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction {
this.inputFiles = inBams
this.listFile = outBamList

View File

@ -50,8 +50,8 @@ trait PicardBamFunction extends JavaCommandLineFunction {
abstract override def commandLine = super.commandLine +
Array(
repeat(" INPUT=", inputBams),
" OUTPUT=" + outputBam,
" TMP_DIR=" + jobTempDir,
optional(" OUTPUT=", outputBam),
optional(" COMPRESSION_LEVEL=", compressionLevel),
optional(" VALIDATION_STRINGENCY=", validationStringency),
optional(" SO=", sortOrder),

View File

@ -0,0 +1,76 @@
package org.broadinstitute.sting.queue.extensions.picard
import org.broadinstitute.sting.commandline._
import java.io.File
/*
* Created by IntelliJ IDEA.
* User: carneiro
* Date: 6/22/11
* Time: 10:35 AM
*/
class SamToFastq extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction {
analysisName = "SamToFastq"
javaMainClass = "net.sf.picard.sam.SamToFastq"
@Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "Input SAM/BAM file to extract reads from.")
var input: List[File] = Nil
@Output(shortName = "fastq", fullName = "output_fastq_file", required = true, doc = "Output fastq file (single-end fastq or, if paired, first end of the pair fastq).")
var fastq: File = _
@Output(shortName = "se", fullName = "second_end_fastq", required = false, doc = "Output fastq file (if paired, second end of the pair fastq).")
var secondEndFastQ: File = _
@Argument(shortName = "opg", fullName = "output_per_readgroup", required = false, doc = "Output a fastq file per read group (two fastq files per read group if the group is paired).")
var outputPerReadGroup: Boolean = false
@Argument(shortName = "od", fullName = "output_dir", required = false, doc = "Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.")
var outputDir: File = _
@Argument(shortName = "rr", fullName = "re_reverse", required = false, doc = "Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq.")
var reReverse: Boolean = true
@Argument(shortName = "nonpf", fullName = "include_non_pf_reads", required = false, doc = "Include non-PF reads from the SAM file into the output FASTQ files.")
var includeNonPFReads: Boolean = false
@Argument(shortName = "cat", fullName = "clipping_attribute", required = false, doc = "The attribute that stores the position at which the SAM record should be clipped.")
var clippingAttribute: String = null
@Argument(shortName = "cac", fullName = "clipping_action", required = false, doc = "The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region.")
var clippingAction: String = null
@Argument(shortName = "r1t", fullName = "read_one_trim", required = false, doc = "The number of bases to trim from the beginning of read 1.")
var readOneTrim: Int = -1
@Argument(shortName = "r1mbtw", fullName = "read_one_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 1 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.")
var readOneMaxBasesToWrite: Int = -1
@Argument(shortName = "r2t", fullName = "read_two_trim", required = false, doc = "The number of bases to trim from the beginning of read 2.")
var readTwoTrim: Int = -1
@Argument(shortName = "r2mbtw", fullName = "read_two_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 2 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.")
var readTwoMaxBasesToWrite: Int = -1
@Argument(shortName = "inpa", fullName = "include_non_primary_alignments", required = false, doc = "If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments.")
var includeNonPrimaryAlignments: Boolean = false
override def inputBams = input
override def outputBam = null
override def commandLine = super.commandLine +
" FASTQ=" + fastq +
optional(" SECOND_END_FASTQ=", secondEndFastQ) +
conditionalParameter(outputPerReadGroup, optional(" OUTPUT_PER_RG=", outputPerReadGroup)) +
optional(" OUTPUT_DIR=", outputDir) +
conditionalParameter(!reReverse, optional(" RE_REVERSE=", reReverse)) +
conditionalParameter(includeNonPFReads, optional(" INCLUDE_NON_PF_READS=", includeNonPFReads)) +
optional(" CLIPPING_ATTRIBUTE=", clippingAttribute) +
optional(" CLIPPING_ACTION=", clippingAction) +
conditionalParameter (readOneTrim >= 0, optional(" READ1_TRIM=", readOneTrim)) +
conditionalParameter (readOneMaxBasesToWrite >= 0, optional(" READ1_MAX_BASES_TO_WRITE=", readOneMaxBasesToWrite)) +
conditionalParameter (readTwoTrim >= 0, optional(" READ2_TRIM=", readTwoTrim)) +
conditionalParameter (readTwoMaxBasesToWrite >=0, optional(" READ2_MAX_BASES_TO_WRITE=", readTwoMaxBasesToWrite)) +
conditionalParameter (includeNonPrimaryAlignments, optional(" INCLUDE_NON_PRIMARY_ALIGNMENTS=", includeNonPrimaryAlignments))
}