diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 7262de863..5e5fd2365 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -65,6 +65,9 @@ class DataProcessingPipeline extends QScript { @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) var useBWApe: Boolean = false + @Input(doc="Decompose input BAM file and fully realign it using BWA SW", fullName="use_bwa_sw", shortName="bwasw", required=false) + var useBWAsw: Boolean = false + @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) var bwaThreads: Int = 1 @@ -162,22 +165,28 @@ class DataProcessingPipeline extends QScript { var index = 1 for (bam <- bams) { // first revert the BAM file to the original qualities - val revertedBAM = revertBAM(bam, true) - val readSortedBam = swapExt(revertedBAM, ".bam", "." + index + ".sorted.bam" ) val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") val realignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.bam") val rgRealignedBamFile = swapExt(bam, ".bam", "." + index + ".realigned.rg.bam") + if (useBWAse) { + val revertedBAM = revertBAM(bam, true) add(bwa_aln_se(revertedBAM, saiFile1), bwa_sam_se(revertedBAM, saiFile1, realignedSamFile)) } - else { - add(sortSam(revertedBAM, readSortedBam, SortOrder.queryname), - bwa_aln_pe(readSortedBam, saiFile1, 1), - bwa_aln_pe(readSortedBam, saiFile2, 2), - bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile)) + else if (useBWApe) { + val revertedBAM = revertBAM(bam, true) + add(bwa_aln_pe(revertedBAM, saiFile1, 1), + bwa_aln_pe(revertedBAM, saiFile2, 2), + bwa_sam_pe(revertedBAM, saiFile1, saiFile2, realignedSamFile)) + } + else if (useBWAsw) { + val revertedBAM = revertBAM(bam, false) + val fastQ = swapExt(revertedBAM, ".bam", ".fq") + add(convertToFastQ(revertedBAM, fastQ), + bwa_sw(fastQ, realignedSamFile)) } add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate)) addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam)) @@ -226,7 +235,7 @@ class DataProcessingPipeline extends QScript { if (nContigs < 0) nContigs = QScriptUtils.getNumberOfContigs(bams(0)) - val realignedBAMs = if (useBWApe || useBWAse) {performAlignment(bams)} else {revertBams(bams, false)} + val realignedBAMs = if (useBWApe || useBWAse || useBWAsw) {performAlignment(bams)} else {revertBams(bams, false)} // generate a BAM file per sample joining all per lane files if necessary val sampleBAMFiles: Map[String, List[File]] = createSampleFiles(bams, realignedBAMs) @@ -427,9 +436,16 @@ class DataProcessingPipeline extends QScript { this.output = outBam this.input :+= inBam this.removeAlignmentInformation = removeAlignmentInfo; + this.sortOrder = if (removeAlignmentInfo) {SortOrder.queryname} else {SortOrder.coordinate} this.analysisName = queueLogDir + outBam + "revert" this.jobName = queueLogDir + outBam + ".revert" + } + case class convertToFastQ (inBam: File, outFQ: File) extends SamToFastq with ExternalCommonArgs { + this.input :+= inBam + this.fastq = outFQ + this.analysisName = queueLogDir + outFQ + "convert_to_fastq" + this.jobName = queueLogDir + outFQ + ".convert_to_fastq" } case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with ExternalCommonArgs { @@ -469,6 +485,14 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".bwa_sam_pe" } + case class bwa_sw (inFastQ: File, outBam: File) extends CommandLineFunction with ExternalCommonArgs { + @Input(doc="fastq file to be aligned") var fq = inFastQ + @Output(doc="output bam file") var bam = outBam + def commandLine = bwaPath + " bwasw -t " + bwaThreads + " " + reference + " " + fq + " > " + bam + this.analysisName = queueLogDir + outBam + ".bwasw" + this.jobName = queueLogDir + outBam + ".bwasw" + } + case class writeList(inBams: List[File], outBamList: File) extends ListWriterFunction { this.inputFiles = inBams this.listFile = outBamList diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala index 2654e4a3d..427c09f82 100644 --- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardBamFunction.scala @@ -50,8 +50,8 @@ trait PicardBamFunction extends JavaCommandLineFunction { abstract override def commandLine = super.commandLine + Array( repeat(" INPUT=", inputBams), - " OUTPUT=" + outputBam, " TMP_DIR=" + jobTempDir, + optional(" OUTPUT=", outputBam), optional(" COMPRESSION_LEVEL=", compressionLevel), optional(" VALIDATION_STRINGENCY=", validationStringency), optional(" SO=", sortOrder), diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala new file mode 100644 index 000000000..2f9f84c63 --- /dev/null +++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/SamToFastq.scala @@ -0,0 +1,76 @@ +package org.broadinstitute.sting.queue.extensions.picard + +import org.broadinstitute.sting.commandline._ + +import java.io.File + +/* + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 6/22/11 + * Time: 10:35 AM + */ +class SamToFastq extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardBamFunction { + analysisName = "SamToFastq" + javaMainClass = "net.sf.picard.sam.SamToFastq" + + @Input(shortName = "input", fullName = "input_bam_files", required = true, doc = "Input SAM/BAM file to extract reads from.") + var input: List[File] = Nil + + @Output(shortName = "fastq", fullName = "output_fastq_file", required = true, doc = "Output fastq file (single-end fastq or, if paired, first end of the pair fastq).") + var fastq: File = _ + + @Output(shortName = "se", fullName = "second_end_fastq", required = false, doc = "Output fastq file (if paired, second end of the pair fastq).") + var secondEndFastQ: File = _ + + @Argument(shortName = "opg", fullName = "output_per_readgroup", required = false, doc = "Output a fastq file per read group (two fastq files per read group if the group is paired).") + var outputPerReadGroup: Boolean = false + + @Argument(shortName = "od", fullName = "output_dir", required = false, doc = "Directory in which to output the fastq file(s). Used only when OUTPUT_PER_RG is true.") + var outputDir: File = _ + + @Argument(shortName = "rr", fullName = "re_reverse", required = false, doc = "Re-reverse bases and qualities of reads with negative strand flag set before writing them to fastq.") + var reReverse: Boolean = true + + @Argument(shortName = "nonpf", fullName = "include_non_pf_reads", required = false, doc = "Include non-PF reads from the SAM file into the output FASTQ files.") + var includeNonPFReads: Boolean = false + + @Argument(shortName = "cat", fullName = "clipping_attribute", required = false, doc = "The attribute that stores the position at which the SAM record should be clipped.") + var clippingAttribute: String = null + + @Argument(shortName = "cac", fullName = "clipping_action", required = false, doc = "The action that should be taken with clipped reads: 'X' means the reads and qualities should be trimmed at the clipped position; 'N' means the bases should be changed to Ns in the clipped region; and any integer means that the base qualities should be set to that value in the clipped region.") + var clippingAction: String = null + + @Argument(shortName = "r1t", fullName = "read_one_trim", required = false, doc = "The number of bases to trim from the beginning of read 1.") + var readOneTrim: Int = -1 + + @Argument(shortName = "r1mbtw", fullName = "read_one_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 1 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.") + var readOneMaxBasesToWrite: Int = -1 + + @Argument(shortName = "r2t", fullName = "read_two_trim", required = false, doc = "The number of bases to trim from the beginning of read 2.") + var readTwoTrim: Int = -1 + + @Argument(shortName = "r2mbtw", fullName = "read_two_max_bases_to_write", required = false, doc = "The maximum number of bases to write from read 2 after trimming. If there are fewer than this many bases left after trimming, all will be written. If this value is null then all bases left after trimming will be written.") + var readTwoMaxBasesToWrite: Int = -1 + + @Argument(shortName = "inpa", fullName = "include_non_primary_alignments", required = false, doc = "If true, include non-primary alignments in the output. Support of non-primary alignments in SamToFastq is not comprehensive, so there may be exceptions if this is set to true and there are paired reads with non-primary alignments.") + var includeNonPrimaryAlignments: Boolean = false + + override def inputBams = input + override def outputBam = null + + override def commandLine = super.commandLine + + " FASTQ=" + fastq + + optional(" SECOND_END_FASTQ=", secondEndFastQ) + + conditionalParameter(outputPerReadGroup, optional(" OUTPUT_PER_RG=", outputPerReadGroup)) + + optional(" OUTPUT_DIR=", outputDir) + + conditionalParameter(!reReverse, optional(" RE_REVERSE=", reReverse)) + + conditionalParameter(includeNonPFReads, optional(" INCLUDE_NON_PF_READS=", includeNonPFReads)) + + optional(" CLIPPING_ATTRIBUTE=", clippingAttribute) + + optional(" CLIPPING_ACTION=", clippingAction) + + conditionalParameter (readOneTrim >= 0, optional(" READ1_TRIM=", readOneTrim)) + + conditionalParameter (readOneMaxBasesToWrite >= 0, optional(" READ1_MAX_BASES_TO_WRITE=", readOneMaxBasesToWrite)) + + conditionalParameter (readTwoTrim >= 0, optional(" READ2_TRIM=", readTwoTrim)) + + conditionalParameter (readTwoMaxBasesToWrite >=0, optional(" READ2_MAX_BASES_TO_WRITE=", readTwoMaxBasesToWrite)) + + conditionalParameter (includeNonPrimaryAlignments, optional(" INCLUDE_NON_PRIMARY_ALIGNMENTS=", includeNonPrimaryAlignments)) +} \ No newline at end of file