Fixing conflicts.
This commit is contained in:
commit
f19862a643
|
|
@ -4,13 +4,13 @@ import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import org.broadinstitute.sting.queue.QScript
|
import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||||
|
|
||||||
import scala.io.Source._
|
|
||||||
import collection.JavaConversions._
|
import collection.JavaConversions._
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||||
import org.broadinstitute.sting.queue.extensions.picard._
|
import org.broadinstitute.sting.queue.extensions.picard._
|
||||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
import net.sf.samtools.{SAMFileReader}
|
||||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||||
|
|
||||||
class DataProcessingPipeline extends QScript {
|
class DataProcessingPipeline extends QScript {
|
||||||
qscript =>
|
qscript =>
|
||||||
|
|
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
val ds: String)
|
val ds: String)
|
||||||
{}
|
{}
|
||||||
|
|
||||||
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
|
|
||||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
|
||||||
var sample: String = ""
|
|
||||||
for (r <- readGroups) {
|
|
||||||
if (sample.isEmpty)
|
|
||||||
sample = r.getSample
|
|
||||||
else if (sample != r.getSample)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
||||||
// It uses the sample information on the header of the input BAM files.
|
// It uses the sample information on the header of the input BAM files.
|
||||||
//
|
//
|
||||||
|
|
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {
|
||||||
|
|
||||||
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
||||||
// with PrintReads to separate the samples. Tell user to do it himself!
|
// with PrintReads to separate the samples. Tell user to do it himself!
|
||||||
assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
assert(!Utils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||||
|
|
||||||
// Fill out the sample table with the readgroups in this file
|
// Fill out the sample table with the readgroups in this file
|
||||||
for (rg <- readGroups) {
|
for (rg <- readGroups) {
|
||||||
|
|
@ -166,12 +154,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
return sampleBamFiles.toMap
|
return sampleBamFiles.toMap
|
||||||
}
|
}
|
||||||
|
|
||||||
// Checks how many contigs are in the dataset. Uses the BAM file header information.
|
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Rebuilds the Read Group string to give BWA
|
// Rebuilds the Read Group string to give BWA
|
||||||
def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
|
def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
|
||||||
val readGroups = samReader.getFileHeader.getReadGroups
|
val readGroups = samReader.getFileHeader.getReadGroups
|
||||||
|
|
@ -215,19 +197,6 @@ class DataProcessingPipeline extends QScript {
|
||||||
return realignedBams
|
return realignedBams
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reads a BAM LIST file and creates a scala list with all the files
|
|
||||||
def createListFromFile(in: File):List[File] = {
|
|
||||||
if (in.toString.endsWith("bam"))
|
|
||||||
return List(in)
|
|
||||||
var l: List[File] = List()
|
|
||||||
for (bam <- fromFile(in).getLines) {
|
|
||||||
if (!bam.startsWith("#") && !bam.isEmpty)
|
|
||||||
l :+= new File(bam.trim)
|
|
||||||
}
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/****************************************************************************
|
/****************************************************************************
|
||||||
* Main script
|
* Main script
|
||||||
|
|
@ -237,8 +206,8 @@ class DataProcessingPipeline extends QScript {
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
// keep a record of the number of contigs in the first bam file in the list
|
// keep a record of the number of contigs in the first bam file in the list
|
||||||
val bams = createListFromFile(input)
|
val bams = Utils.createListFromFile(input)
|
||||||
nContigs = getNumberOfContigs(bams(0))
|
nContigs = Utils.getNumberOfContigs(bams(0))
|
||||||
|
|
||||||
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.queue.QScript
|
||||||
import org.broadinstitute.sting.queue.extensions.gatk._
|
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||||
import net.sf.samtools.SAMFileReader
|
import net.sf.samtools.SAMFileReader
|
||||||
import io.Source._
|
import io.Source._
|
||||||
|
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -33,25 +34,10 @@ class RecalibrateBaseQualities extends QScript {
|
||||||
val queueLogDir: String = ".qlog/"
|
val queueLogDir: String = ".qlog/"
|
||||||
var nContigs: Int = 0
|
var nContigs: Int = 0
|
||||||
|
|
||||||
def getNumberOfContigs(bamFile: File): Int = {
|
|
||||||
val samReader = new SAMFileReader(new File(bamFile))
|
|
||||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Reads a BAM LIST file and creates a scala list with all the files
|
|
||||||
def createListFromFile(in: File):List[File] = {
|
|
||||||
if (in.toString.endsWith("bam"))
|
|
||||||
return List(in)
|
|
||||||
var l: List[File] = List()
|
|
||||||
for (bam <- fromFile(in).getLines)
|
|
||||||
l :+= new File(bam)
|
|
||||||
return l
|
|
||||||
}
|
|
||||||
|
|
||||||
def script = {
|
def script = {
|
||||||
|
|
||||||
val bamList = createListFromFile(input)
|
val bamList = Utils.createListFromFile(input)
|
||||||
nContigs = getNumberOfContigs(bamList(0))
|
nContigs = Utils.getNumberOfContigs(bamList(0))
|
||||||
|
|
||||||
for (bam <- bamList) {
|
for (bam <- bamList) {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,60 @@
|
||||||
|
package org.broadinstitute.sting.queue.qscripts.utils
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import io.Source._
|
||||||
|
import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
|
||||||
|
|
||||||
|
import collection.JavaConversions._
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: carneiro
|
||||||
|
* Date: 7/14/11
|
||||||
|
* Time: 4:57 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
object Utils {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||||
|
* to have empty lines and comment lines (lines starting with #).
|
||||||
|
*/
|
||||||
|
def createListFromFile(in: File):List[File] = {
|
||||||
|
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
||||||
|
// and return a list with only this file.
|
||||||
|
if (in.toString.endsWith(".bam"))
|
||||||
|
return List(in)
|
||||||
|
|
||||||
|
var list: List[File] = List()
|
||||||
|
for (bam <- fromFile(in).getLines)
|
||||||
|
if (!bam.startsWith("#") && !bam.isEmpty )
|
||||||
|
list :+= new File(bam.trim())
|
||||||
|
list
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of contigs in the BAM file header.
|
||||||
|
*/
|
||||||
|
def getNumberOfContigs(bamFile: File): Int = {
|
||||||
|
val samReader = new SAMFileReader(new File(bamFile))
|
||||||
|
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if there are multiple samples in a BAM file
|
||||||
|
*/
|
||||||
|
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||||
|
var sample: String = ""
|
||||||
|
for (r <- readGroups) {
|
||||||
|
if (sample.isEmpty)
|
||||||
|
sample = r.getSample
|
||||||
|
else if (sample != r.getSample)
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue