Cryptic implementation of base-string entropy. I suspect this scales ~linearly with length, so I may choose to normalize in the future.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4861 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
3a6d1dbcef
commit
fd1d817d45
|
|
@ -99,9 +99,21 @@ class IntervalInfoBuilder(loc : GenomeLoc, minProp : Double) {
|
|||
finalized = true
|
||||
def isGC(b : Byte) : Int = if ( BaseUtils.gIndex == b || BaseUtils.cIndex == b ) { 1 } else { 0 }
|
||||
gcContent = baseContent.foldLeft[Int](0)( (a,b) => a + isGC(b)).asInstanceOf[Double]/location.size()
|
||||
entropy = 0.0 // todo -- implement me
|
||||
entropy = calcEntropy(baseContent.map(b => ListBuffer(b))) + calcEntropy(baseContent.reverse.map(b => ListBuffer(b)))
|
||||
val meta : String = metaData.reduceLeft(_ + "\t" + _)
|
||||
return "%s\t%d\t%d\t%.2f\t%.2f\t%s".format(location.getContig,location.getStart,location.getStop,gcContent,entropy,meta)
|
||||
}
|
||||
|
||||
def calcEntropy(byteList : ListBuffer[ListBuffer[Byte]]) : Double = {
|
||||
if(byteList.size == 1) return 0
|
||||
Math.log(1+byteList.tail.size-byteList.tail.dropWhile( u => u.equals(byteList(1))).size) +
|
||||
calcEntropy(byteList.tail.foldLeft(ListBuffer(byteList(0)))( (a,b) => {
|
||||
if ( b.equals(byteList(1)) ) {
|
||||
a.dropRight(1) + (a.last ++ b)
|
||||
} else {
|
||||
a + b
|
||||
}
|
||||
}))
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue