From f4463d38ca28209ad59b3c5cb7579c3f104fab5c Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 30 Jun 2011 14:29:21 -0400 Subject: [PATCH 1/3] BWA requires pair ended reads to be sorted by read names when operating over BAM files, but Picard sorts by coordinate, so in case we use BWA in pair ended reads, the pipeline now resorts the BAM in read name order, realigns it then sorts it in coordinate order. --- .../queue/qscripts/DataProcessingPipeline.scala | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index fdde5e8a2..623181de6 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -4,12 +4,12 @@ import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.function.ListWriterFunction -import net.sf.samtools.{SAMFileReader,SAMReadGroupRecord} - import scala.io.Source._ import collection.JavaConversions._ import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.queue.extensions.picard._ +import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord} +import net.sf.samtools.SAMFileHeader.SortOrder class DataProcessingPipeline extends QScript { @@ -180,6 +180,7 @@ class DataProcessingPipeline extends QScript { var realignedBams: List[File] = List() var index = 1 for (bam <- bams) { + val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" ) val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") @@ -190,11 +191,12 @@ class DataProcessingPipeline extends QScript { bwa_sam_se(bam, saiFile1, realignedSamFile)) } else { - add(bwa_aln_pe(bam, saiFile1, 1), - bwa_aln_pe(bam, saiFile2, 2), - bwa_sam_pe(bam, saiFile1, saiFile2, realignedSamFile)) + add(sortSam(bam, readSortedBam, SortOrder.queryname), + bwa_aln_pe(readSortedBam, saiFile1, 1), + bwa_aln_pe(readSortedBam, saiFile2, 2), + bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile)) } - add(sortSam(realignedSamFile, realignedBamFile)) + add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate)) addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam)) realignedBams :+= rgRealignedBamFile index = index + 1 @@ -385,9 +387,10 @@ class DataProcessingPipeline extends QScript { this.jobName = queueLogDir + outBam + ".joinBams" } - case class sortSam (inSam: File, outBam: File) extends SortSam { + case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam { this.input = List(inSam) this.output = outBam + this.sortOrder = sortOrderP this.memoryLimit = 4 this.isIntermediate = true this.analysisName = queueLogDir + outBam + ".sortSam" From 197b7141c198b40b23c058414379fdba2fb47450 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 30 Jun 2011 14:41:57 -0400 Subject: [PATCH 2/3] Added an optional argument -bt for BWA to run multithreaded. --- .../sting/queue/qscripts/DataProcessingPipeline.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index 623181de6..f9369ee3f 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -69,6 +69,8 @@ class DataProcessingPipeline extends QScript { @Input(doc="Decompose input BAM file and fully realign it using BWA and assume Pair Ended reads", fullName="use_bwa_pair_ended", shortName="bwape", required=false) var useBWApe: Boolean = false + @Input(doc="Number of threads BWA should use", fullName="bwa_threads", shortName="bt", required=false) + var bwaThreads: Int = 1 /**************************************************************************** @@ -433,7 +435,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_se (inBam: File, outSai: File) extends CommandLineFunction with BWACommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file") var sai = outSai - def commandLine = bwaPath + " aln -q 5 " + reference + " -b " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b " + bam + " > " + sai this.analysisName = queueLogDir + outSai + ".bwa_aln_se" this.jobName = queueLogDir + outSai + ".bwa_aln_se" } @@ -441,7 +443,7 @@ class DataProcessingPipeline extends QScript { case class bwa_aln_pe (inBam: File, outSai1: File, index: Int) extends CommandLineFunction with BWACommonArgs { @Input(doc="bam file to be aligned") var bam = inBam @Output(doc="output sai file for 1st mating pair") var sai = outSai1 - def commandLine = bwaPath + " aln -q 5 " + reference + " -b" + index + " " + bam + " > " + sai + def commandLine = bwaPath + " aln -t " + bwaThreads + " -q 5 " + reference + " -b" + index + " " + bam + " > " + sai this.analysisName = queueLogDir + outSai1 + ".bwa_aln_pe1" this.jobName = queueLogDir + outSai1 + ".bwa_aln_pe1" } From 2cb1376ed0acb2fea84d5d38a6c524e603ed269b Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 30 Jun 2011 14:55:39 -0400 Subject: [PATCH 3/3] VCFStreaming was failing integration tests because now select variants outputs the samples in alphabetical order, instead of random as before. Fixed the MD5. --- .../gatk/walkers/variantutils/VCFStreamingIntegrationTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java index 15733e104..4ed9718fd 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/VCFStreamingIntegrationTest.java @@ -60,7 +60,7 @@ public class VCFStreamingIntegrationTest extends WalkerTest { " --NO_HEADER" + " -o %s", 1, - Arrays.asList("2cae3d16f9ed00b07d87e9c49272d877") + Arrays.asList("995c07ccd593fe1c35d0d28155112a55") ); executeTest("testSimpleVCFStreaming", spec);