Added a qscripts util package with some utility functions commonly shared across queue scripts. Refactored some of my public scripts to use it in an effort to make queue scripts more reusable and "supportable".
This commit is contained in:
parent
9f5180ab05
commit
09ffe277ae
|
|
@ -4,13 +4,13 @@ import org.broadinstitute.sting.queue.extensions.gatk._
|
|||
import org.broadinstitute.sting.queue.QScript
|
||||
import org.broadinstitute.sting.queue.function.ListWriterFunction
|
||||
|
||||
import scala.io.Source._
|
||||
import collection.JavaConversions._
|
||||
import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel
|
||||
import org.broadinstitute.sting.queue.extensions.picard._
|
||||
import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord}
|
||||
import net.sf.samtools.{SAMFileReader}
|
||||
import net.sf.samtools.SAMFileHeader.SortOrder
|
||||
|
||||
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||
|
||||
class DataProcessingPipeline extends QScript {
|
||||
qscript =>
|
||||
|
|
@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript {
|
|||
val ds: String)
|
||||
{}
|
||||
|
||||
// Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that)
|
||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||
var sample: String = ""
|
||||
for (r <- readGroups) {
|
||||
if (sample.isEmpty)
|
||||
sample = r.getSample
|
||||
else if (sample != r.getSample)
|
||||
return true;
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// Utility function to merge all bam files of similar samples. Generates one BAM file per sample.
|
||||
// It uses the sample information on the header of the input BAM files.
|
||||
//
|
||||
|
|
@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript {
|
|||
|
||||
// only allow one sample per file. Bam files with multiple samples would require pre-processing of the file
|
||||
// with PrintReads to separate the samples. Tell user to do it himself!
|
||||
assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||
assert(!Utils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam)
|
||||
|
||||
// Fill out the sample table with the readgroups in this file
|
||||
for (rg <- readGroups) {
|
||||
|
|
@ -157,12 +145,6 @@ class DataProcessingPipeline extends QScript {
|
|||
return sampleBamFiles.toMap
|
||||
}
|
||||
|
||||
// Checks how many contigs are in the dataset. Uses the BAM file header information.
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
// Rebuilds the Read Group string to give BWA
|
||||
def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) {
|
||||
val readGroups = samReader.getFileHeader.getReadGroups
|
||||
|
|
@ -206,15 +188,7 @@ class DataProcessingPipeline extends QScript {
|
|||
return realignedBams
|
||||
}
|
||||
|
||||
// Reads a BAM LIST file and creates a scala list with all the files
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
if (in.toString.endsWith("bam"))
|
||||
return List(in)
|
||||
var l: List[File] = List()
|
||||
for (bam <- fromFile(in).getLines)
|
||||
l :+= new File(bam)
|
||||
return l
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
@ -226,8 +200,8 @@ class DataProcessingPipeline extends QScript {
|
|||
def script = {
|
||||
|
||||
// keep a record of the number of contigs in the first bam file in the list
|
||||
val bams = createListFromFile(input)
|
||||
nContigs = getNumberOfContigs(bams(0))
|
||||
val bams = Utils.createListFromFile(input)
|
||||
nContigs = Utils.getNumberOfContigs(bams(0))
|
||||
|
||||
val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broadinstitute.sting.queue.QScript
|
|||
import org.broadinstitute.sting.queue.extensions.gatk._
|
||||
import net.sf.samtools.SAMFileReader
|
||||
import io.Source._
|
||||
import org.broadinstitute.sting.queue.qscripts.utils.Utils
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -33,25 +34,10 @@ class RecalibrateBaseQualities extends QScript {
|
|||
val queueLogDir: String = ".qlog/"
|
||||
var nContigs: Int = 0
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
return samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
// Reads a BAM LIST file and creates a scala list with all the files
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
if (in.toString.endsWith("bam"))
|
||||
return List(in)
|
||||
var l: List[File] = List()
|
||||
for (bam <- fromFile(in).getLines)
|
||||
l :+= new File(bam)
|
||||
return l
|
||||
}
|
||||
|
||||
def script = {
|
||||
|
||||
val bamList = createListFromFile(input)
|
||||
nContigs = getNumberOfContigs(bamList(0))
|
||||
val bamList = Utils.createListFromFile(input)
|
||||
nContigs = Utils.getNumberOfContigs(bamList(0))
|
||||
|
||||
for (bam <- bamList) {
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,62 @@
|
|||
package org.broadinstitute.sting.queue.qscripts.utils
|
||||
|
||||
import java.io.File
|
||||
import io.Source._
|
||||
import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader}
|
||||
|
||||
import collection.JavaConversions._
|
||||
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: carneiro
|
||||
* Date: 7/14/11
|
||||
* Time: 4:57 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
object Utils {
|
||||
|
||||
/**
|
||||
* Takes a bam list file and produces a scala list with each file allowing the bam list
|
||||
* to have empty lines and comment lines (lines starting with #).
|
||||
*/
|
||||
def createListFromFile(in: File):List[File] = {
|
||||
// If the file provided ends with .bam, it is not a bam list, we treat it as a single file.
|
||||
// and return a list with only this file.
|
||||
if (in.toString.endsWith(".bam"))
|
||||
return List(in)
|
||||
|
||||
var list: List[File] = List()
|
||||
for (bam <- fromFile(in).getLines)
|
||||
if (!bam.startsWith("#") && !bam.isEmpty )
|
||||
list :+= new File(bam.trim())
|
||||
list
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of contigs in the BAM file header.
|
||||
*/
|
||||
|
||||
def getNumberOfContigs(bamFile: File): Int = {
|
||||
val samReader = new SAMFileReader(new File(bamFile))
|
||||
samReader.getFileHeader.getSequenceDictionary.getSequences.size()
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if there are multiple samples in a BAM file
|
||||
*/
|
||||
|
||||
def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = {
|
||||
var sample: String = ""
|
||||
for (r <- readGroups) {
|
||||
if (sample.isEmpty)
|
||||
sample = r.getSample
|
||||
else if (sample != r.getSample)
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue