diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala index d6caabd23..b6fe59f6d 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/DataProcessingPipeline.scala @@ -4,13 +4,13 @@ import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.function.ListWriterFunction -import scala.io.Source._ import collection.JavaConversions._ import org.broadinstitute.sting.gatk.walkers.indels.IndelRealigner.ConsensusDeterminationModel import org.broadinstitute.sting.queue.extensions.picard._ -import net.sf.samtools.{SAMFileReader, SAMReadGroupRecord} +import net.sf.samtools.{SAMFileReader} import net.sf.samtools.SAMFileHeader.SortOrder +import org.broadinstitute.sting.queue.qscripts.utils.Utils class DataProcessingPipeline extends QScript { qscript => @@ -103,18 +103,6 @@ class DataProcessingPipeline extends QScript { val ds: String) {} - // Utility function to check if there are multiple samples in a BAM file (currently we can't deal with that) - def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = { - var sample: String = "" - for (r <- readGroups) { - if (sample.isEmpty) - sample = r.getSample - else if (sample != r.getSample) - return true; - } - return false - } - // Utility function to merge all bam files of similar samples. Generates one BAM file per sample. // It uses the sample information on the header of the input BAM files. // @@ -135,7 +123,7 @@ class DataProcessingPipeline extends QScript { // only allow one sample per file. Bam files with multiple samples would require pre-processing of the file // with PrintReads to separate the samples. Tell user to do it himself! - assert(!hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam) + assert(!Utils.hasMultipleSamples(readGroups), "The pipeline requires that only one sample is present in a BAM file. Please separate the samples in " + bam) // Fill out the sample table with the readgroups in this file for (rg <- readGroups) { @@ -166,12 +154,6 @@ class DataProcessingPipeline extends QScript { return sampleBamFiles.toMap } - // Checks how many contigs are in the dataset. Uses the BAM file header information. - def getNumberOfContigs(bamFile: File): Int = { - val samReader = new SAMFileReader(new File(bamFile)) - return samReader.getFileHeader.getSequenceDictionary.getSequences.size() - } - // Rebuilds the Read Group string to give BWA def addReadGroups(inBam: File, outBam: File, samReader: SAMFileReader) { val readGroups = samReader.getFileHeader.getReadGroups @@ -215,19 +197,6 @@ class DataProcessingPipeline extends QScript { return realignedBams } - // Reads a BAM LIST file and creates a scala list with all the files - def createListFromFile(in: File):List[File] = { - if (in.toString.endsWith("bam")) - return List(in) - var l: List[File] = List() - for (bam <- fromFile(in).getLines) { - if (!bam.startsWith("#") && !bam.isEmpty) - l :+= new File(bam.trim) - } - return l - } - - /**************************************************************************** * Main script @@ -237,8 +206,8 @@ class DataProcessingPipeline extends QScript { def script = { // keep a record of the number of contigs in the first bam file in the list - val bams = createListFromFile(input) - nContigs = getNumberOfContigs(bams(0)) + val bams = Utils.createListFromFile(input) + nContigs = Utils.getNumberOfContigs(bams(0)) val realignedBams = if (useBWApe || useBWAse) {performAlignment(bams)} else {bams} diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala index 88c7f5ff7..b2960f150 100755 --- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/RecalibrateBaseQualities.scala @@ -4,6 +4,7 @@ import org.broadinstitute.sting.queue.QScript import org.broadinstitute.sting.queue.extensions.gatk._ import net.sf.samtools.SAMFileReader import io.Source._ +import org.broadinstitute.sting.queue.qscripts.utils.Utils /** * Created by IntelliJ IDEA. @@ -33,25 +34,10 @@ class RecalibrateBaseQualities extends QScript { val queueLogDir: String = ".qlog/" var nContigs: Int = 0 - def getNumberOfContigs(bamFile: File): Int = { - val samReader = new SAMFileReader(new File(bamFile)) - return samReader.getFileHeader.getSequenceDictionary.getSequences.size() - } - - // Reads a BAM LIST file and creates a scala list with all the files - def createListFromFile(in: File):List[File] = { - if (in.toString.endsWith("bam")) - return List(in) - var l: List[File] = List() - for (bam <- fromFile(in).getLines) - l :+= new File(bam) - return l - } - def script = { - val bamList = createListFromFile(input) - nContigs = getNumberOfContigs(bamList(0)) + val bamList = Utils.createListFromFile(input) + nContigs = Utils.getNumberOfContigs(bamList(0)) for (bam <- bamList) { diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/utils/Utils.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/utils/Utils.scala new file mode 100644 index 000000000..1189b376c --- /dev/null +++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/utils/Utils.scala @@ -0,0 +1,60 @@ +package org.broadinstitute.sting.queue.qscripts.utils + +import java.io.File +import io.Source._ +import net.sf.samtools.{SAMReadGroupRecord, SAMFileReader} + +import collection.JavaConversions._ + + +/** + * Created by IntelliJ IDEA. + * User: carneiro + * Date: 7/14/11 + * Time: 4:57 PM + * To change this template use File | Settings | File Templates. + */ + +object Utils { + + /** + * Takes a bam list file and produces a scala list with each file allowing the bam list + * to have empty lines and comment lines (lines starting with #). + */ + def createListFromFile(in: File):List[File] = { + // If the file provided ends with .bam, it is not a bam list, we treat it as a single file. + // and return a list with only this file. + if (in.toString.endsWith(".bam")) + return List(in) + + var list: List[File] = List() + for (bam <- fromFile(in).getLines) + if (!bam.startsWith("#") && !bam.isEmpty ) + list :+= new File(bam.trim()) + list + } + + /** + * Returns the number of contigs in the BAM file header. + */ + def getNumberOfContigs(bamFile: File): Int = { + val samReader = new SAMFileReader(new File(bamFile)) + samReader.getFileHeader.getSequenceDictionary.getSequences.size() + } + + /** + * Check if there are multiple samples in a BAM file + */ + def hasMultipleSamples(readGroups: java.util.List[SAMReadGroupRecord]): Boolean = { + var sample: String = "" + for (r <- readGroups) { + if (sample.isEmpty) + sample = r.getSample + else if (sample != r.getSample) + return true; + } + false + } + + +} \ No newline at end of file