From 58c7b27ccc1469202f3a6642308eed370dbcdf9e Mon Sep 17 00:00:00 2001 From: kshakir Date: Tue, 26 Apr 2011 00:12:41 +0000 Subject: [PATCH] Missing file from last checkin. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5688 348d0f76-0448-11de-a6fe-93d51630548a --- .../queue/extensions/gatk/GATKIntervals.scala | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100755 scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala diff --git a/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala new file mode 100755 index 000000000..577781de9 --- /dev/null +++ b/scala/src/org/broadinstitute/sting/queue/extensions/gatk/GATKIntervals.scala @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.queue.extensions.gatk + +import java.io.File +import collection.JavaConversions._ +import org.broadinstitute.sting.utils.interval.IntervalUtils +import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource +import net.sf.samtools.SAMFileHeader +import java.util.Collections +import org.broadinstitute.sting.utils.GenomeLocParser + +case class GATKIntervals(reference: File, intervals: List[String]) { + private lazy val referenceDataSource = new ReferenceDataSource(reference) + private var splitsBySize = Map.empty[Int, java.util.List[java.lang.Integer]] + + lazy val samFileHeader = { + val header = new SAMFileHeader + header.setSequenceDictionary(referenceDataSource.getReference.getSequenceDictionary) + header + } + + lazy val locs = { + val parser = new GenomeLocParser(referenceDataSource.getReference) + val parsedLocs = IntervalUtils.parseIntervalArguments(parser, intervals, false) + Collections.sort(parsedLocs) + parsedLocs + } + + lazy val contigs = locs.map(_.getContig).distinct + + def getSplits(size: Int) = { + splitsBySize.getOrElse(size, { + val splits: java.util.List[java.lang.Integer] = IntervalUtils.splitFixedIntervals(locs, size) + splitsBySize += size -> splits + splits + }) + } +}