support for scala walkers
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1468 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
7cf9a54b64
commit
ab9458d06d
49
build.xml
49
build.xml
|
|
@ -175,6 +175,55 @@
|
|||
</javac>
|
||||
</target>
|
||||
|
||||
|
||||
<property name="scala.home" value="/home/radon01/depristo/work/local/scala-2.7.5.final"/>
|
||||
<property
|
||||
name="build.dir"
|
||||
value="${base.dir}/build"
|
||||
/>
|
||||
<target name="init.scala">
|
||||
<property
|
||||
name="scala-library.jar"
|
||||
value="${scala.home}/lib/scala-library.jar"
|
||||
/>
|
||||
<path id="build.classpath">
|
||||
<pathelement location="${scala-library.jar}" />
|
||||
<!--<pathelement location="${your.path}" />-->
|
||||
<pathelement location="${build.dir}" />
|
||||
</path>
|
||||
<taskdef name="scalac" classname="scala.tools.ant.Scalac">
|
||||
<classpath>
|
||||
<pathelement location="${scala.home}/lib/scala-compiler.jar" />
|
||||
<pathelement location="${scala-library.jar}" />
|
||||
</classpath>
|
||||
</taskdef>
|
||||
</target>
|
||||
|
||||
<property name="scala.dir" value="scala" />
|
||||
<property name="scala.classes" value="scala/classes" />
|
||||
<target name="scala" depends="init.scala" description="build the scala code">
|
||||
<echo message="Sting: Compiling scala -- omg!"/>
|
||||
<mkdir dir="${scala.classes}"/>
|
||||
|
||||
<scalac srcdir="${scala.dir}" destdir="${scala.classes}" force="changed">
|
||||
<include name="**/*.scala"/>
|
||||
<classpath>
|
||||
<path refid="runtime.dependencies" />
|
||||
<pathelement location="build"/>
|
||||
<pathelement location="lib/junit-4.4.jar"/>
|
||||
<pathelement location="${scala.home}/lib/scala-compiler.jar" />
|
||||
<pathelement location="${scala-library.jar}" />
|
||||
</classpath>
|
||||
</scalac>
|
||||
|
||||
<delete file="${dist}/GATKScala.jar"/>
|
||||
<jar jarfile="${dist}/GATKScala.jar">
|
||||
<fileset dir="${scala.classes}">
|
||||
<include name="**/*.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
||||
<!-- TEST -->
|
||||
<target name="test" depends="test.compile,agent" description="Run unit tests">
|
||||
<mkdir dir="${report}"/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,200 @@
|
|||
package org.broadinstitute.sting.scala
|
||||
|
||||
import gatk.walkers.genotyper.{SingleSampleGenotyper, SSGGenotypeCall}
|
||||
import java.io.File
|
||||
import net.sf.samtools.SAMRecord
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
import utils._
|
||||
import cmdLine.Argument
|
||||
|
||||
class TransitionTable() {
|
||||
var table: Array[Array[Double]] = new Array[Array[Double]](4,4)
|
||||
|
||||
def makeKey(b: Char): Int = {
|
||||
return BaseUtils.simpleBaseToBaseIndex(b)
|
||||
}
|
||||
|
||||
def fromKey(i: Int): Char = {
|
||||
return BaseUtils.baseIndexToSimpleBase(i)
|
||||
}
|
||||
|
||||
def add(ref: Char, subst: Char) = {
|
||||
table(makeKey(ref))(makeKey(subst)) += 1
|
||||
}
|
||||
|
||||
def get(i: Int, j: Int): Double = {
|
||||
return table(i)(j)
|
||||
}
|
||||
|
||||
def set(i: Int, j: Int, d: Double) = {
|
||||
//printf("Setting %d / %d => %f%n", i, j, d)
|
||||
table(i)(j) = d
|
||||
}
|
||||
|
||||
def mapEntries[A](f: ((Int, Int, Double) => A)): List[A] = {
|
||||
return for { i <- List.range(0, 4); j <- List.range(0,4) } yield f(i, j, get(i,j))
|
||||
}
|
||||
|
||||
def header(): String = {
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "P(%c|true=%c)" format (fromKey(j), fromKey(i))).toArray)
|
||||
}
|
||||
|
||||
override def toString(): String = {
|
||||
return Utils.join("\t", mapEntries((i,j,x) => "%.2f" format x).toArray)
|
||||
}
|
||||
|
||||
def sum(): Double = {
|
||||
return sumList(table.toList map (x => sumList(x.toList)))
|
||||
}
|
||||
|
||||
def sumList(xs: List[Double]): Double = {
|
||||
return (0.0 :: xs) reduceLeft {(x, y) => x + y}
|
||||
}
|
||||
|
||||
def getRateMatrix(): TransitionTable = {
|
||||
var sums = (table.toList map (x => sumList(x.toList))).toArray
|
||||
var t = new TransitionTable()
|
||||
|
||||
//println(sums.toList)
|
||||
this mapEntries ((i,j,x) => t.set(i, j, x / sums(i)))
|
||||
|
||||
return t
|
||||
}
|
||||
}
|
||||
|
||||
class BaseTransitionTableCalculator extends LocusWalker[Unit,Int] {
|
||||
private var MIN_MAPPING_QUALITY = 30
|
||||
private var MIN_BASE_QUALITY = 20
|
||||
private var MIN_LOD = 5
|
||||
private var PRINT_FREQ = 1000000
|
||||
private var CARE_ABOUT_STRAND = true
|
||||
|
||||
private val SSG = new SingleSampleGenotyper()
|
||||
private val table1 = new TransitionTable()
|
||||
private val tableFWD = new TransitionTable()
|
||||
private val tableREV = new TransitionTable()
|
||||
private val table2 = new TransitionTable()
|
||||
|
||||
@Argument() { val shortName = "v", val doc = "Print out verbose output", val required = false }
|
||||
var VERBOSE: Boolean = false
|
||||
|
||||
override def initialize() {
|
||||
SSG.initialize();
|
||||
}
|
||||
|
||||
override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Unit = {
|
||||
val call = SSG.map(tracker, ref, context)
|
||||
val pileup = new ReadBackedPileup(ref.getBase(), context)
|
||||
|
||||
def hasNoNs(): Boolean = {
|
||||
return ! (pileup.getBases() exists ('N' ==))
|
||||
}
|
||||
|
||||
if ( isDefinitelyHomRef(call) && hasNoNs() ) {
|
||||
var (refBases, nonRefBases) = splitBases(ref.getBase, pileup.getBases)
|
||||
|
||||
//printf("nonRefBases %s%n", nonRefBases)
|
||||
if ( nonRefBases.length > 0 ) {
|
||||
var (nonRefReads, offsets) = getNonRefReads(ref.getBase, pileup)
|
||||
var nonRefRead: SAMRecord = nonRefReads.head
|
||||
|
||||
nonRefBases.length match {
|
||||
case 1 =>
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table1)
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, if ( nonRefRead.getReadNegativeStrandFlag ) tableREV else tableFWD )
|
||||
case 2 =>
|
||||
addRead(nonRefRead, offsets.head, nonRefBases.head, ref, table2)
|
||||
addRead(nonRefReads.tail.head, offsets.tail.head, nonRefBases.tail.head, ref, table2)
|
||||
case x =>
|
||||
}
|
||||
|
||||
if ( VERBOSE && pileup.size > 30 && nonRefReads.length < 4 )
|
||||
printNonRefBases(ref, nonRefReads, offsets, context.getReads.size())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
implicit def listToJavaList[T](l: List[T]) = java.util.Arrays.asList(l.toArray: _*)
|
||||
|
||||
def printNonRefBases(ref: ReferenceContext, nonRefReads: List[SAMRecord], offsets: List[Integer], depth: Integer): Unit = {
|
||||
out.println(new ReadBackedPileup(ref.getLocus(), ref.getBase(), listToJavaList(nonRefReads), offsets) + " " + depth)
|
||||
}
|
||||
|
||||
def addRead(nonRefRead: SAMRecord, offset: Integer, nonRefBase: Char, ref: ReferenceContext, table: TransitionTable): Unit = {
|
||||
if ( goodRead(nonRefRead, offset) ) {
|
||||
//printf("Including site:%n call=%s%n nonRefBases=%s%n refBases=%s%n nonRefRead=%s%n offset=%s%n", call, nonRefBases, refBases, nonRefRead.format, offsets.head)
|
||||
//println(call.getReadDepth, call.getReferencebase, nonRefBases, refBases, nonRefRead.getMappingQuality)
|
||||
if ( CARE_ABOUT_STRAND && nonRefRead.getReadNegativeStrandFlag() ) {
|
||||
// it's on the reverse strand
|
||||
val refComp: Char = BaseUtils.simpleComplement(ref.getBase)
|
||||
val nonRefComp: Char = BaseUtils.simpleComplement(nonRefBase)
|
||||
|
||||
//printf("Negative: %c/%c is actually %c/%c%n", ref.getBase, nonRefBases.head, refComp, nonRefComp)
|
||||
table.add(refComp, nonRefComp)
|
||||
} else {
|
||||
table.add(ref.getBase, nonRefBase)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def getNonRefReads(ref: Char, pileup: ReadBackedPileup): (List[SAMRecord], List[Integer]) = {
|
||||
def getBase(i: Int): Char = {
|
||||
var offset = pileup.getOffsets().get(i)
|
||||
return pileup.getReads().get(i).getReadString().charAt(offset.asInstanceOf[Int])
|
||||
}
|
||||
|
||||
var subset = for { i <- List.range(0, pileup.size) if getBase(i) != ref } yield i
|
||||
var reads = subset map (i => pileup.getReads().get(i))
|
||||
var offsets = subset map (i => pileup.getOffsets().get(i))
|
||||
|
||||
//println(reads, offsets, subset)
|
||||
return (reads, offsets)
|
||||
}
|
||||
|
||||
|
||||
def hasFewNonRefBases(refBase: Char, pileup: String): List[Char] = {
|
||||
splitBases(refBase, pileup) match {
|
||||
case (refs, nonRefs) =>
|
||||
//printf("nrf=%s, rb=%s%n", nonRefs.mkString, refs.mkString)
|
||||
return nonRefs
|
||||
}
|
||||
}
|
||||
|
||||
def goodRead( read: SAMRecord, offset: Integer ): Boolean = {
|
||||
return read.getMappingQuality() > MIN_MAPPING_QUALITY && read.getBaseQualities()(offset.intValue) > MIN_BASE_QUALITY
|
||||
}
|
||||
|
||||
def splitBases(refBase: Char, pileup: String): (List[Char], List[Char]) = {
|
||||
val refBases = pileup.toList filter (refBase ==)
|
||||
val nonRefBases = pileup.toList filter (refBase !=)
|
||||
return (refBases, nonRefBases)
|
||||
}
|
||||
|
||||
def isDefinitelyHomRef(call: SSGGenotypeCall): Boolean = {
|
||||
return ! call.isVariation() && call.getBestNext() > MIN_LOD
|
||||
}
|
||||
|
||||
def reduceInit(): Int = {
|
||||
return 0;
|
||||
}
|
||||
|
||||
def reduce(m: Unit, r: Int): Int = {
|
||||
return r;
|
||||
}
|
||||
|
||||
override def onTraversalDone(r: Int) = {
|
||||
out.println("-------------------- FINAL RESULT --------------------")
|
||||
out.println("type\t" + table1.header)
|
||||
def print1(t: String, x: TransitionTable) = {
|
||||
out.println(t + "\t" + x)
|
||||
}
|
||||
|
||||
print1("1-mismatch", table1)
|
||||
print1("2-mismatch", table2)
|
||||
print1("1-mismatch-fwd", tableFWD)
|
||||
print1("1-mismatch-rev", tableREV)
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
object HelloWorld {
|
||||
def main(args: Array[String]) {
|
||||
println("Hello, world!")
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,25 @@
|
|||
package org.broadinstitute.sting.scala
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
|
||||
class PrintReadsScala extends LocusWalker[Int,Int] {
|
||||
override def map(tracker: RefMetaDataTracker, ref: ReferenceContext, context: AlignmentContext): Int = {
|
||||
//println(context.getReads().size())
|
||||
return 1
|
||||
}
|
||||
|
||||
def reduceInit(): Int = {
|
||||
return 0;
|
||||
}
|
||||
|
||||
def reduce(m: Int, r: Int): Int = {
|
||||
return m + r;
|
||||
}
|
||||
|
||||
def main(args: Array[String]) {
|
||||
println("Hello, world!")
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue