diff --git a/scala/qscript/oneoffs/fromer/PhaseSamples.scala b/scala/qscript/oneoffs/fromer/PhaseSamples.scala index 100789ce2..40d34e855 100644 --- a/scala/qscript/oneoffs/fromer/PhaseSamples.scala +++ b/scala/qscript/oneoffs/fromer/PhaseSamples.scala @@ -1,11 +1,8 @@ -import org.broadinstitute.sting.queue.extensions.picard.PicardBamJarFunction +package oneoffs.fromer + import org.broadinstitute.sting.queue.extensions.gatk._ -import org.broadinstitute.sting.queue.extensions.samtools.SamtoolsIndexFunction import org.broadinstitute.sting.queue.QScript -import org.apache.commons.io.FilenameUtils -import tools.nsc.io.Process; -import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils.VariantMergeType -import scala.io.Source._ +import org.broadinstitute.sting.queue.util.BAMutilities class PhaseSamples extends QScript { qscript => @@ -55,7 +52,7 @@ class PhaseSamples extends QScript { def script = { if (qscript.scatterCount > 0) throw new RuntimeException("scatter/gather currently not implemented") - val samples: List[String] = getSamples() + val samples: List[String] = BAMutilities.getSamplesFromVCF(masterCalls) Console.out.printf("Samples are %s%n", samples) val targets: List[Target] = bamsToTargets(samples, bams) @@ -71,67 +68,20 @@ class PhaseSamples extends QScript { } def bamsToTargets(samples: List[String], bamsIn: File): List[Target] = { - val bams: List[File] = parseBamsInput(bamsIn) - val sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = getMapOfBamsForSample(bams) + val bams: List[File] = BAMutilities.parseBamsInput(bamsIn) + val sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = BAMutilities.getMapOfBamsForSample(bams) def buildTargets(samples: List[String], count: Int): List[Target] = (samples splitAt samplesPerJob) match { case (Nil, y) => return Nil case (subsamples, remaining) => - return new Target("group" + count, subsamples, findBamsForSamples(subsamples, sampleToBams)) :: + return new Target("group" + count, subsamples, BAMutilities.findBamsForSamples(subsamples, sampleToBams)) :: buildTargets(remaining, count + 1) } return buildTargets(samples, 0) } - def getMapOfBamsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = bams match { - case Nil => return scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]] - - case x :: y => - val m: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = getMapOfBamsForSample(y) - - val getBamSamplesCommand: String = "samtools view -H " + x.getPath() + " | grep '^@RG' | awk '{for (i = 1; i <= NF; i++) if (substr($i,1,3) == \"SM:\") print substr($i,4)}' | sort | uniq" - //println("getBamSamplesCommand: " + getBamSamplesCommand) - val bamSamples: List[String] = Process(getBamSamplesCommand).iterator toList - - for (s <- bamSamples) { - if (!m.contains(s)) - m += s -> scala.collection.mutable.Set.empty[File] - - m(s) = m(s) + x - } - - return m - } - - def findBamsForSamples(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): List[File] = { - val l: List[File] = Nil - l ++ findBamsForSamplesHelper(samples, sampleToBams) - } - - def findBamsForSamplesHelper(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): scala.collection.mutable.Set[File] = samples match { - case Nil => scala.collection.mutable.Set.empty[File] - - case x :: y => - var bamsForSampleX: scala.collection.mutable.Set[File] = scala.collection.mutable.Set.empty[File] - if (sampleToBams.contains(x)) - bamsForSampleX = sampleToBams(x) - return bamsForSampleX ++ findBamsForSamplesHelper(y, sampleToBams) - } - - def getSamples(): List[String] = { - val getSamplesCommand: String = "cat " + masterCalls.getPath() + " | grep '^#CHROM' | head -1 | awk '{for (i = 10; i <= NF; i++) print $i}'" - //println("getSamplesCommand: " + getSamplesCommand) - return Process(getSamplesCommand).iterator toList - } - - def parseBamsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match { - case "bam" => return List(bamsIn) - case "list" => return (for (line <- fromFile(bamsIn).getLines) yield new File(line)).toList - case _ => throw new RuntimeException("Unexpected BAM input type: " + bamsIn + "; only permitted extensions are .bam and .list") - } - class PhaseSamples(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.ReadBackedPhasing with CommandLineGATKArgs { this.rodBind :+= RodBind("variant", "VCF", qscript.masterCalls) this.out = t.phasedVCFFile diff --git a/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala b/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala index 1b6b81eec..07ec44b33 100644 --- a/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala +++ b/scala/qscript/oneoffs/fromer/ReadDepthCNVanalysis.scala @@ -2,8 +2,8 @@ package oneoffs.fromer import org.broadinstitute.sting.queue.extensions.gatk._ import org.broadinstitute.sting.queue.QScript - import org.broadinstitute.sting.gatk.DownsampleType +import org.broadinstitute.sting.queue.util.BAMutilities class ReadDepthCNVanalysis extends QScript { @@ -24,6 +24,9 @@ class ReadDepthCNVanalysis extends QScript { @Input(doc = "level of parallelism for BAM DoC. By default is set to 0 [no scattering].", shortName = "scatter", required = false) var scatterCountInput = 0 + @Input(doc = "Samples to phase together. By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false) + var samplesPerJob = 1 + @Output(doc = "DoC file to output", shortName = "o", required = true) var outputDoC: File = _ @@ -44,11 +47,40 @@ class ReadDepthCNVanalysis extends QScript { this.logging_level = "INFO" } - def script = { - add(new DepthOfCoverage(bams, outputDoC)) + // A target has a list of samples and bam files to use for DoC + class Target(val name: String, val samples: List[String], val bams: List[File]) { + def DoC_output = new File(name + "." + outputDoC) + + override def toString(): String = String.format("[Target %s with samples %s against bams %s]", name, samples, bams) } - class DepthOfCoverage(bam: File, docOutPath: File) extends org.broadinstitute.sting.queue.extensions.gatk.DepthOfCoverage with CommandLineGATKArgs { + def script = { + val sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = BAMutilities.getMapOfBamsForSample(BAMutilities.parseBamsInput(bams)) + val samples: List[String] = sampleToBams.keys.toList + Console.out.printf("Samples are %s%n", samples) + + val targets: List[Target] = buildTargets(samples, sampleToBams) + + for (target <- targets) { + Console.out.printf("Target is %s%n", target) + add(new DoC(target)) + } + } + + def buildTargets(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): List[Target] = { + + def buildTargetsHelper(samples: List[String], count: Int): List[Target] = (samples splitAt samplesPerJob) match { + case (Nil, y) => + return Nil + case (subsamples, remaining) => + return new Target("group" + count, subsamples, BAMutilities.findBamsForSamples(subsamples, sampleToBams)) :: + buildTargetsHelper(remaining, count + 1) + } + + return buildTargetsHelper(samples, 0) + } + + class DoC(t: Target) extends org.broadinstitute.sting.queue.extensions.gatk.DepthOfCoverage with CommandLineGATKArgs { this.omitIntervalStatistics = false this.omitDepthOutputAtEachBase = true this.omitLocusTable = true @@ -56,8 +88,8 @@ class ReadDepthCNVanalysis extends QScript { this.minBaseQuality = Some(0) this.minMappingQuality = Some(0) - this.out = docOutPath - this.input_file :+= bam + this.out = t.DoC_output + this.input_file = t.bams this.dcov = Some(MAX_DEPTH) this.downsampling_type = Some(DownsampleType.BY_SAMPLE) @@ -68,7 +100,7 @@ class ReadDepthCNVanalysis extends QScript { this.scatterCount = scatterCountInput - override def dotString = "DOC: " + bam.getName + override def dotString = "DOC: " + t.DoC_output } } \ No newline at end of file diff --git a/scala/src/org/broadinstitute/sting/queue/util/BAMutilities.scala b/scala/src/org/broadinstitute/sting/queue/util/BAMutilities.scala new file mode 100644 index 000000000..7a5b9237b --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/util/BAMutilities.scala @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.queue.util + +import tools.nsc.io.Process +import java.io.File +import org.apache.commons.io.FilenameUtils +import scala.io.Source._ + +object BAMutilities { + + def getSamplesFromVCF(calls: File): List[String] = { + val getSamplesCommand: String = "cat " + calls.getPath() + " | grep '^#CHROM' | head -1 | awk '{for (i = 10; i <= NF; i++) print $i}'" + //println("getSamplesCommand: " + getSamplesCommand) + return Process(getSamplesCommand).iterator toList + } + + def parseBamsInput(bamsIn: File): List[File] = FilenameUtils.getExtension(bamsIn.getPath) match { + case "bam" => return List(bamsIn) + case "list" => return (for (line <- fromFile(bamsIn).getLines) yield new File(line)).toList + case _ => throw new RuntimeException("Unexpected BAM input type: " + bamsIn + "; only permitted extensions are .bam and .list") + } + + def getMapOfBamsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = bams match { + case Nil => return scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]] + + case x :: y => + val m: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = getMapOfBamsForSample(y) + + val getBamSamplesCommand: String = "samtools view -H " + x.getPath() + " | grep '^@RG' | awk '{for (i = 1; i <= NF; i++) if (substr($i,1,3) == \"SM:\") print substr($i,4)}' | sort | uniq" + //println("getBamSamplesCommand: " + getBamSamplesCommand) + val bamSamples: List[String] = Process(getBamSamplesCommand).iterator toList + + for (s <- bamSamples) { + if (!m.contains(s)) + m += s -> scala.collection.mutable.Set.empty[File] + + m(s) = m(s) + x + } + + return m + } + + def findBamsForSamples(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): List[File] = { + val l: List[File] = Nil + l ++ findBamsForSamplesHelper(samples, sampleToBams) + } + + def findBamsForSamplesHelper(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): scala.collection.mutable.Set[File] = samples match { + case Nil => scala.collection.mutable.Set.empty[File] + + case x :: y => + var bamsForSampleX: scala.collection.mutable.Set[File] = scala.collection.mutable.Set.empty[File] + if (sampleToBams.contains(x)) + bamsForSampleX = sampleToBams(x) + return bamsForSampleX ++ findBamsForSamplesHelper(y, sampleToBams) + } +} \ No newline at end of file