From 64048a67e890be2c479a597daa6d48f6890af671 Mon Sep 17 00:00:00 2001 From: Mauricio Carneiro Date: Thu, 30 Jun 2011 15:20:43 -0400 Subject: [PATCH] cleaning up ghost scala scripts. Deleting clearly unused one and moving others to qscripts.archive --- .../scala/BaseTransitionTableCalculator.scala | 211 ------------------ .../scala/IntervalAnnotationWalker.scala | 120 ---------- .../sting/scala/ScalaCountLoci.scala | 24 -- 3 files changed, 355 deletions(-) delete mode 100755 public/scala/src/org/broadinstitute/sting/scala/BaseTransitionTableCalculator.scala delete mode 100755 public/scala/src/org/broadinstitute/sting/scala/IntervalAnnotationWalker.scala delete mode 100755 public/scala/src/org/broadinstitute/sting/scala/ScalaCountLoci.scala diff --git a/public/scala/src/org/broadinstitute/sting/scala/BaseTransitionTableCalculator.scala b/public/scala/src/org/broadinstitute/sting/scala/BaseTransitionTableCalculator.scala deleted file mode 100755 index 7fe249beb..000000000 --- a/public/scala/src/org/broadinstitute/sting/scala/BaseTransitionTableCalculator.scala +++ /dev/null @@ -1,211 +0,0 @@ -package org.broadinstitute.sting.scala -/** -import gatk.walkers.genotyper.{UnifiedGenotyper, GenotypeCall} -import java.io.File -import net.sf.samtools.SAMRecord -import org.broadinstitute.sting.gatk.walkers.LocusWalker -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker -import org.broadinstitute.sting.gatk.contexts.ReferenceContext -import org.broadinstitute.sting.gatk.contexts.AlignmentContext -import org.broadinstitute.sting.utils.Pair -import org.broadinstitute.sting.utils.genotype.GenotypeMetaData -import utils._ -import cmdLine.Argument - -class TransitionTable() { - var table: Array[Array[Double]] = new Array[Array[Double]](4,4) - - def makeKey(b: Char): Int = { - return BaseUtils.simpleBaseToBaseIndex(b) - } - - def fromKey(i: Int): Char = { - return BaseUtils.baseIndexToSimpleBase(i) - } - - def add(ref: Char, subst: Char) = { - table(makeKey(ref))(makeKey(subst)) += 1 - } - - def get(i: Int, j: Int): Double = { - return table(i)(j) - } - - def set(i: Int, j: Int, d: Double) = { - //printf("Setting %d / %d => %f%n", i, j, d) - table(i)(j) = d - } - - def mapEntries[A](f: ((Int, Int, Double) => A)): List[A] = { - return for { i <- List.range(0, 4); j <- List.range(0,4) } yield f(i, j, get(i,j)) - } - - def header(): String = { - //return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray) - return Utils.join("\t", mapEntries((i,j,x) => "[%c|%c]" format (fromKey(j), fromKey(i))).toArray) - } - - override def toString(): String = { - return Utils.join("\t", mapEntries((i,j,x) => "%.2f" format x).toArray) - } - - def sum(): Double = { - return sumList(table.toList map (x => sumList(x.toList))) - } - - def sumList(xs: List[Double]): Double = { - return (0.0 :: xs) reduceLeft {(x, y) => x + y} - } - - def getRateMatrix(): TransitionTable = { - var sums = (table.toList map (x => sumList(x.toList))).toArray - var t = new TransitionTable() - - //println(sums.toList) - this mapEntries ((i,j,x) => t.set(i, j, x / sums(i))) - - return t - } -} - -**/class BaseTransitionTableCalculator /**{ // extends LocusWalker[Unit,Int] { - private var MIN_MAPPING_QUALITY = 30 - private var MIN_BASE_QUALITY = 20 - private var MIN_LOD = 5 - private var PRINT_FREQ = 1000000 - private var CARE_ABOUT_STRAND = true - - private val SSG = new UnifiedGenotyper() - private val table1 = new TransitionTable() - private val tableFWD = new TransitionTable() - private val tableREV = new TransitionTable() - private val table2 = new TransitionTable() - - @Argument() { val shortName = "v", val doc = "Print out verbose output", val required = false } - var VERBOSE: Boolean = false - - override def initialize() { - SSG.initialize(); - } - - override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Unit = { - val callPair = SSG.map(tracker, ref, context) - //val call = callPair.getFirst().get(0) - val pileup = new ReadBackedPileup(ref.getBase(), context) - - def hasNoNs(): Boolean = { - return ! (pileup.getBases() exists ('N' ==)) - } - - if ( isDefinitelyHomRef(callPair) && hasNoNs() ) { - var (refBases, nonRefBases) = splitBases(ref.getBase, pileup.getBases) - - //printf("nonRefBases %s%n", nonRefBases) - if ( nonRefBases.length > 0 ) { - var (nonRefReads, offsets) = getNonRefReads(ref.getBase, pileup) - var nonRefRead: SAMRecord = nonRefReads.head - - nonRefBases.length match { - case 1 => - //if ( goodRead(nonRefRead,offsets.head) ) { - //printf("Including site:%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head) - //} - addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1) - addRead(nonRefRead, offsets.head, nonRefBases.head, ref, if ( nonRefRead.getReadNegativeStrandFlag ) tableREV else tableFWD ) - case 2 => - addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table2) - addRead(nonRefReads.tail.head, offsets.tail.head, nonRefBases.tail.head, ref, table2) - case x => - } - - if ( VERBOSE && pileup.size > 30 && nonRefReads.length < 4 ) - printNonRefBases(ref, nonRefReads, offsets, context.getReads.size()) - } - } - } - - implicit def listToJavaList[T](l: List[T]) = java.util.Arrays.asList(l.toArray: _*) - - def printNonRefBases(ref: ReferenceContext, nonRefReads: List[SAMRecord], offsets: List[Integer], depth: Integer): Unit = { - out.println(new ReadBackedPileup(ref.getLocus(), ref.getBase(), listToJavaList(nonRefReads), offsets) + " " + depth) - } - - def addRead(nonRefRead: SAMRecord, offset: Integer, nonRefBase: Char, ref: ReferenceContext, table: TransitionTable): Unit = { - if ( goodRead(nonRefRead, offset) ) { - //printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", nonRefBases, refBases, nonRefRead.format, offsets.head) - //println(call.getReadDepth, call.getReferencebase, nonRefBases, refBases, nonRefRead.getMappingQuality) - if ( CARE_ABOUT_STRAND && nonRefRead.getReadNegativeStrandFlag() ) { - // it's on the reverse strand - val refComp: Char = BaseUtils.simpleComplement(ref.getBase) - val nonRefComp: Char = BaseUtils.simpleComplement(nonRefBase) - - //printf("Negative: %c/%c is actually %c/%c%n", ref.getBase, nonRefBases.head, refComp, nonRefComp) - table.add(refComp, nonRefComp) - } else { - table.add(ref.getBase, nonRefBase) - } - } - } - - def getNonRefReads(ref: Char, pileup: ReadBackedPileup): (List[SAMRecord], List[Integer]) = { - def getBase(i: Int): Char = { - var offset = pileup.getOffsets().get(i) - return pileup.getReads().get(i).getReadString().charAt(offset.asInstanceOf[Int]) - } - - var subset = for { i <- List.range(0, pileup.size) if getBase(i) != ref } yield i - var reads = subset map (i => pileup.getReads().get(i)) - var offsets = subset map (i => pileup.getOffsets().get(i)) - - //println(reads, offsets, subset) - return (reads, offsets) - } - - - def hasFewNonRefBases(refBase: Char, pileup: String): List[Char] = { - splitBases(refBase, pileup) match { - case (refs, nonRefs) => - //printf("nrf=%s, rb=%s%n", nonRefs.mkString, refs.mkString) - return nonRefs - } - } - - def goodRead( read: SAMRecord, offset: Integer ): Boolean = { - return read.getMappingQuality() > MIN_MAPPING_QUALITY && read.getBaseQualities()(offset.intValue) > MIN_BASE_QUALITY - } - - def splitBases(refBase: Char, pileup: String): (List[Char], List[Char]) = { - val refBases = pileup.toList filter (refBase ==) - val nonRefBases = pileup.toList filter (refBase !=) - return (refBases, nonRefBases) - } - - def isDefinitelyHomRef(call: Pair[java.util.List[GenotypeCall],GenotypeMetaData]): Boolean = { - if ( call == null ) - return false; - - return (! call.getFirst().get(0).isVariant()) && call.getFirst().get(0).getNegLog10PError() > MIN_LOD - } - - def reduceInit(): Int = { - return 0; - } - - def reduce(m: Unit, r: Int): Int = { - return r; - } - - override def onTraversalDone(r: Int) = { - out.println("-------------------- FINAL RESULT --------------------") - out.println("type\t" + table1.header) - def print1(t: String, x: TransitionTable) = { - out.println(t + "\t" + x) - } - - print1("1-mismatch", table1) - print1("2-mismatch", table2) - print1("1-mismatch-fwd", tableFWD) - print1("1-mismatch-rev", tableREV) - } -} -*/ \ No newline at end of file diff --git a/public/scala/src/org/broadinstitute/sting/scala/IntervalAnnotationWalker.scala b/public/scala/src/org/broadinstitute/sting/scala/IntervalAnnotationWalker.scala deleted file mode 100755 index fa30fceff..000000000 --- a/public/scala/src/org/broadinstitute/sting/scala/IntervalAnnotationWalker.scala +++ /dev/null @@ -1,120 +0,0 @@ -package org.broadinstitute.sting.scala - -import java.io.PrintStream -import org.broadinstitute.sting.gatk.contexts.{ReferenceContext, AlignmentContext} -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker -import collection.JavaConversions._ -import collection.immutable.List -import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature -import org.broadinstitute.sting.gatk.refdata.features.table.TableFeature -import org.broadinstitute.sting.gatk.refdata.features.refseq.RefSeqFeature -import org.broadinstitute.sting.gatk.walkers.{TreeReducible, RefWalker} -import org.broadinstitute.sting.commandline.{Output, Argument} -import org.broadinstitute.sting.utils.{BaseUtils, GenomeLoc} -import collection.mutable.{ListBuffer, HashSet} -import java.lang.Math - -class IntervalAnnotationWalker extends RefWalker[AnnotationMetaData,List[IntervalInfoBuilder]] { - @Argument(doc="Min proportion of bases overlapping between an interval of interest and an annotation interval for annotation to occur",shortName="mpb") - var minPropOverlap : Double = 0.50 // default to 50% - @Output var out : PrintStream = _ - - override def reduceInit : List[IntervalInfoBuilder] = { - Nil - } - - override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext) : AnnotationMetaData = { - val ivalsOfInterest : List[GATKFeature] = tracker.getGATKFeatureMetaData("interval_list",false).toList - val refSeqData : List[Object] = tracker.getReferenceMetaData("refseq").toList - val tableMetaData : List[Object] = tracker.getReferenceMetaData("table",false).toList - var amd : AnnotationMetaData = new AnnotationMetaData(ref.getLocus,ref.getBase) - if ( refSeqData != null ) { - amd.refSeqFeatures = refSeqData.map( (o: Object) => o.asInstanceOf[RefSeqFeature]) - } - if ( tableMetaData != null ) { - amd.tableFeatures = tableMetaData.map( (o: Object) => o.asInstanceOf[TableFeature]).map( (u: TableFeature) => - new Tuple2(u.getLocation,u.getAllValues.toList.reduceLeft(_ + "," + _))) - } - amd.newIvals = ivalsOfInterest.map(_.getLocation) - return amd - } - - override def reduce(mapMetaData : AnnotationMetaData, rList : List[IntervalInfoBuilder]) : List[IntervalInfoBuilder] = { - rList.filter( (u : IntervalInfoBuilder) => u.location.isBefore(mapMetaData.locus)).foreach(u => out.print("%s%n".format(u.done))) - val newList : List[IntervalInfoBuilder] = rList.filter( ! _.finalized ) ::: - mapMetaData.newIvals.filter( (g: GenomeLoc) => g.getStart == mapMetaData.locus.getStart ).map( u => new IntervalInfoBuilder(u,minPropOverlap) ) - newList.foreach(_.update(mapMetaData)) - return newList - } - - override def onTraversalDone(finalList : List[IntervalInfoBuilder]) = { - finalList.foreach(u => out.print("%s%n".format(u.done))) - } - -} - -class AnnotationMetaData(loc: GenomeLoc, ref: Byte) { - val locus : GenomeLoc = loc - var newIvals : List[GenomeLoc] = Nil - var refSeqFeatures : List[RefSeqFeature] = Nil - var tableFeatures : List[(GenomeLoc,String)] = Nil - val refBase : Byte = ref -} - -class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) { - - val OVERLAP_PROP : Double = minProp - - var metaData : HashSet[String] = new HashSet[String] - var seenTranscripts : HashSet[String] = new HashSet[String] - var baseContent : ListBuffer[Byte] = new ListBuffer[Byte] - var location : GenomeLoc = loc - var gcContent : Double = _ - var entropy : Double = _ - var finalized : Boolean = false - - - def update(mmd : AnnotationMetaData) = { - baseContent += mmd.refBase - mmd.refSeqFeatures.filter( p => ! seenTranscripts.contains(p.getTranscriptUniqueGeneName)).view.foreach(u => addRefSeq(u)) - mmd.tableFeatures.filter( (t : (GenomeLoc,String)) => - ! metaData.contains(t._2) && (t._1.intersect(location).size.asInstanceOf[Double]/location.size() >= OVERLAP_PROP) ).foreach( t => metaData.add(t._2)) - } - - def addRefSeq( rs: RefSeqFeature ) : Unit = { - seenTranscripts.add(rs.getTranscriptUniqueGeneName) - val exons : List[(GenomeLoc,Int)] = rs.getExons.toList.zipWithIndex.filter( p => ((p._1.intersect(location).size+0.0)/location.size) >= minProp ) - if ( exons.size > 0 ) { - metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,exons.map( p => "exon%d".format(p._2) ).reduceLeft(_ + "," + _))) - } else { - if ( location.isBefore(rs.getExons.get(0)) || location.isPast(rs.getExons.get(rs.getNumExons-1)) ) { - metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,"UTR")) - } else { - metaData.add("%s[%s]".format(rs.getTranscriptUniqueGeneName,"Intron")) - } - } - - } - - def done : String = { - finalized = true - def isGC(b : Byte) : Int = if ( BaseUtils.gIndex == b || BaseUtils.cIndex == b ) { 1 } else { 0 } - gcContent = baseContent.foldLeft[Int](0)( (a,b) => a + isGC(b)).asInstanceOf[Double]/location.size() - entropy = calcEntropy(baseContent.map(b => ListBuffer(b))) + calcEntropy(baseContent.reverse.map(b => ListBuffer(b))) - val meta : String = metaData.reduceLeft(_ + "\t" + _) - return "%s\t%d\t%d\t%.2f\t%.2f\t%s".format(location.getContig,location.getStart,location.getStop,gcContent,entropy,meta) - } - - def calcEntropy(byteList : ListBuffer[ListBuffer[Byte]]) : Double = { - if(byteList.size == 1) return 0 - Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) + - calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => { - if ( b.equals(byteList(1)) ) { - a.dropRight(1) :+ (a.last ++ b) - } else { - a :+ b - } - })) - } - -} \ No newline at end of file diff --git a/public/scala/src/org/broadinstitute/sting/scala/ScalaCountLoci.scala b/public/scala/src/org/broadinstitute/sting/scala/ScalaCountLoci.scala deleted file mode 100755 index 658aabc18..000000000 --- a/public/scala/src/org/broadinstitute/sting/scala/ScalaCountLoci.scala +++ /dev/null @@ -1,24 +0,0 @@ -package org.broadinstitute.sting.scala - -import org.broadinstitute.sting.gatk.walkers.LocusWalker -import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker -import org.broadinstitute.sting.gatk.contexts.ReferenceContext -import org.broadinstitute.sting.gatk.contexts.AlignmentContext - -class ScalaCountLoci extends LocusWalker[Int,Int] { - override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Int = { - return 1 - } - - def reduceInit(): Int = { - return 0; - } - - def reduce(m: Int, r: Int): Int = { - return m + r; - } - - def main(args: Array[String]) { - println("Hello, world!") - } -}