Cryptic implementation of base-string entropy. I suspect this scales ~linearly with length, so I may choose to normalize in the future.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4861 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
chartl 2010-12-16 22:25:05 +00:00
parent 3a6d1dbcef
commit fd1d817d45
1 changed files with 13 additions and 1 deletions

View File

@ -99,9 +99,21 @@ class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) {
finalized = true
def isGC(b : Byte) : Int = if ( BaseUtils.gIndex == b || BaseUtils.cIndex == b ) { 1 } else { 0 }
gcContent = baseContent.foldLeft[Int](0)( (a,b) => a + isGC(b)).asInstanceOf[Double]/location.size()
entropy = 0.0 // todo -- implement me
entropy = calcEntropy(baseContent.map(b => ListBuffer(b))) + calcEntropy(baseContent.reverse.map(b => ListBuffer(b)))
val meta : String = metaData.reduceLeft(_ + "\t" + _)
return "%s\t%d\t%d\t%.2f\t%.2f\t%s".format(location.getContig,location.getStart,location.getStop,gcContent,entropy,meta)
}
def calcEntropy(byteList : ListBuffer[ListBuffer[Byte]]) : Double = {
if(byteList.size == 1) return 0
Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) +
calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => {
if ( b.equals(byteList(1)) ) {
a.dropRight(1) + (a.last ++ b)
} else {
a + b
}
}))
}
}