BWA requires pair ended reads to be sorted by read names when operating over BAM files, but Picard sorts by coordinate, so in case we use BWA in pair ended reads, the pipeline now resorts the BAM in read name order, realigns it then sorts it in coordinate order.

This commit is contained in:
Mauricio Carneiro 2011-06-30 14:29:21 -04:00
parent bb6b1d615b
commit f4463d38ca
1 changed files with 10 additions and 7 deletions

View File

@ -4,12 +4,12 @@ import org.broadinstitute.sting.queue.extensions.gatk._
import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.QScript
import org.broadinstitute.sting.queue.function.ListWriterFunction import org.broadinstitute.sting.queue.function.ListWriterFunction
import net.sf.samtools.{SAMFileReader,SAMReadGroupRecord}
import scala.io.Source._ import scala.io.Source._
import collection.JavaConversions._ import collection.JavaConversions._
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
import org.broadinstitute.sting.queue.extensions.picard._ import org.broadinstitute.sting.queue.extensions.picard._
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
import net.sf.samtools.SAMFileHeader.SortOrder
class DataProcessingPipeline extends QScript { class DataProcessingPipeline extends QScript {
@ -180,6 +180,7 @@ class DataProcessingPipeline extends QScript {
var realignedBams: List[File] = List() var realignedBams: List[File] = List()
var index = 1 var index = 1
for (bam <- bams) { for (bam <- bams) {
val readSortedBam = swapExt(bam, ".bam", "." + index + ".sorted.bam" )
val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai") val saiFile1 = swapExt(bam, ".bam", "." + index + ".1.sai")
val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai") val saiFile2 = swapExt(bam, ".bam", "." + index + ".2.sai")
val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam") val realignedSamFile = swapExt(bam, ".bam", "." + index + ".realigned.sam")
@ -190,11 +191,12 @@ class DataProcessingPipeline extends QScript {
bwa_sam_se(bam, saiFile1, realignedSamFile)) bwa_sam_se(bam, saiFile1, realignedSamFile))
} }
else { else {
add(bwa_aln_pe(bam, saiFile1, 1), add(sortSam(bam, readSortedBam, SortOrder.queryname),
bwa_aln_pe(bam, saiFile2, 2), bwa_aln_pe(readSortedBam, saiFile1, 1),
bwa_sam_pe(bam, saiFile1, saiFile2, realignedSamFile)) bwa_aln_pe(readSortedBam, saiFile2, 2),
bwa_sam_pe(readSortedBam, saiFile1, saiFile2, realignedSamFile))
} }
add(sortSam(realignedSamFile, realignedBamFile)) add(sortSam(realignedSamFile, realignedBamFile, SortOrder.coordinate))
addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam)) addReadGroups(realignedBamFile, rgRealignedBamFile, new SAMFileReader(bam))
realignedBams :+= rgRealignedBamFile realignedBams :+= rgRealignedBamFile
index = index + 1 index = index + 1
@ -385,9 +387,10 @@ class DataProcessingPipeline extends QScript {
this.jobName = queueLogDir + outBam + ".joinBams" this.jobName = queueLogDir + outBam + ".joinBams"
} }
case class sortSam (inSam: File, outBam: File) extends SortSam { case class sortSam (inSam: File, outBam: File, sortOrderP: SortOrder) extends SortSam {
this.input = List(inSam) this.input = List(inSam)
this.output = outBam this.output = outBam
this.sortOrder = sortOrderP
this.memoryLimit = 4 this.memoryLimit = 4
this.isIntermediate = true this.isIntermediate = true
this.analysisName = queueLogDir + outBam + ".sortSam" this.analysisName = queueLogDir + outBam + ".sortSam"