From 4d251fb91f48d18656f4a790452b970d24543d67 Mon Sep 17 00:00:00 2001 From: kshakir Date: Wed, 4 May 2011 19:13:39 +0000 Subject: [PATCH] Why won't you die? git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5758 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/queue/util/PipelineUtils.scala | 101 ------------------ 1 file changed, 101 deletions(-) delete mode 100755 scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala diff --git a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala b/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala deleted file mode 100755 index 22059df99..000000000 --- a/scala/src/org/broadinstitute/sting/queue/util/PipelineUtils.scala +++ /dev/null @@ -1,101 +0,0 @@ -package org.broadinstitute.sting.queue.util - -import net.sf.picard.reference.ReferenceSequenceFileFactory -import java.io.File -import org.broadinstitute.sting.utils.GenomeLocParser -import collection.JavaConversions._ -import org.broadinstitute.sting.utils.interval.IntervalUtils -import org.broadinstitute.sting.queue.pipeline.PipelineArgumentCollection -import org.broadinstitute.sting.utils.yaml.YamlUtils -import org.broadinstitute.sting.datasources.pipeline.{PipelineSample, PipelineProject, Pipeline} -import org.broadinstitute.sting.utils.text.XReadLines - -class PipelineUtils { - -} - -object PipelineUtils{ - - def smartSplitContigs(reference: File, intervals: File, sets: Int) : List[List[String]] = { - var genomeLocParser: GenomeLocParser = new GenomeLocParser(ReferenceSequenceFileFactory.getReferenceSequenceFile(reference)) - val targets = IntervalUtils.parseIntervalArguments(genomeLocParser,List(intervals.getAbsolutePath), false) - - // Build up a map of contigs with sizes. - var contigSizes = Map.empty[String, Long] - // todo -- make this look like functional code for Q's sake - //targets.foreach( loc => { contigSizes += loc -> { contigSizes.get(loc.getContig) match { case Some(size) => size + loc.size case None => loc.size } } }) - - for (loc <- targets) { - val contig = loc.getContig - val contigSize = loc.size - contigSizes += contig -> { - contigSizes.get(contig) match { - case Some(size) => size + contigSize - case None => contigSize - } - } - } - - // Keep a list of pairs of sizes with lists of contigs. - var splitContigs = List.empty[(Long, List[String])] - for ((contigName, contigSize) <- contigSizes) { - if (splitContigs.size < sets) { - // If there are fewer than the requested number of sets, just add this contig. - splitContigs :+= contigSize -> List(contigName) - } else { - // If there is already a number of sets - // sort the contigs to get the smallest one first. - splitContigs = splitContigs.sortBy{case (size, contigs) => size} - // Update the pair with the new contig size and name. - var smallContigs = splitContigs.head - smallContigs = (smallContigs._1 + contigSize) -> (smallContigs._2 :+ contigName) - // Re add the pair to the list. - splitContigs = smallContigs :: splitContigs.tail - } - } - - splitContigs.map{case (size, contigs) => contigs} - } - - def loadPipelineFromPAC(args: PipelineArgumentCollection) : Pipeline = { - if ( args.yamlFile != null ) { - return YamlUtils.load(classOf[Pipeline], args.yamlFile) - } else { - return loadPipelineFromSpec(args.projectName,args.projectRef,args.projectIntervals,args.projectDBSNP,args.projectBams) - } - } - - def loadPipelineFromSpec(name: String, ref: File, ivals: File, dbsnp: File, pBamList: File) : Pipeline = { - var newPipeline : Pipeline = new Pipeline - var pipeProject : PipelineProject = new PipelineProject - var pipeSamples : List[PipelineSample] = ((new XReadLines(pBamList)).readLines).toList.map( bamSpecToSample ) - - pipeProject.setName(name) - pipeProject.setReferenceFile(ref) - pipeProject.setIntervalList(ivals) - pipeProject.setGenotypeDbsnp(dbsnp) - - newPipeline.setProject(pipeProject) - newPipeline.setSamples(pipeSamples) - - return newPipeline - } - - //todo -- find a better name for this function - def bamSpecToSample(spec: String) : PipelineSample = { - var sam : PipelineSample = new PipelineSample - var spStr : Array[String] = spec.split("\\s") - sam.setId(spStr(0)) - var tagStr : Array[String] = spStr(1).split(",") - var tagMap : java.util.HashMap[String,String] = new java.util.HashMap[String,String](tagStr.size) - tagStr.filter( u => ! u.equals("")).foreach( u => tagMap.put(u.split(":")(0),u.split(":")(1)) ) - sam.setTags(tagMap) - var bamStr : Array[String] = spStr(2).split(",") - var bamMap : java.util.HashMap[String,File] = new java.util.HashMap[String,File](bamStr.size) - bamStr.foreach( u => bamMap.put( u.split(":")(0), new File(u.split(":")(1) ) ) ) - sam.setBamFiles(bamMap) - - return sam - - } -} \ No newline at end of file