Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
e5ce5e265a
299
build.xml
299
build.xml
|
|
@ -163,6 +163,14 @@
|
||||||
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
<!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
|
||||||
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
<delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
|
||||||
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
|
<delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
|
||||||
|
<!--
|
||||||
|
An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
|
||||||
|
Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
|
||||||
|
Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
|
||||||
|
-->
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
|
||||||
|
<delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="init.buildall">
|
<target name="init.buildall">
|
||||||
|
|
@ -709,53 +717,6 @@
|
||||||
</antcall>
|
</antcall>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="test.init.compile">
|
|
||||||
<mkdir dir="${java.test.classes}"/>
|
|
||||||
<mkdir dir="${scala.test.classes}"/>
|
|
||||||
<antcall target="resolve">
|
|
||||||
<param name="ivy.conf" value="test"/>
|
|
||||||
</antcall>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
|
|
||||||
<echo message="Sting: Compiling test cases!"/>
|
|
||||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
|
||||||
<src path="${java.public.test.sources}"/>
|
|
||||||
<src path="${java.private.test.sources}"/>
|
|
||||||
<classpath>
|
|
||||||
<path refid="external.dependencies" />
|
|
||||||
<pathelement location="${java.classes}"/>
|
|
||||||
<pathelement location="${java.contracts}"/>
|
|
||||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
</classpath>
|
|
||||||
<compilerarg value="-proc:none"/>
|
|
||||||
<!--
|
|
||||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
|
||||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
|
||||||
-->
|
|
||||||
</javac>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
|
||||||
<echo message="Scala: Compiling test cases!"/>
|
|
||||||
<antcall target="resolve">
|
|
||||||
<param name="ivy.conf" value="test"/>
|
|
||||||
</antcall>
|
|
||||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
|
||||||
<src path="${scala.public.test.sources}" />
|
|
||||||
<src path="${scala.private.test.sources}" />
|
|
||||||
<include name="**/*.scala"/>
|
|
||||||
<classpath>
|
|
||||||
<path refid="scala.dependencies"/>
|
|
||||||
<pathelement location="${scala.test.classes}"/>
|
|
||||||
<pathelement location="${java.test.classes}"/>
|
|
||||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
</classpath>
|
|
||||||
</scalac>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
|
|
||||||
|
|
||||||
<!-- new scala target -->
|
<!-- new scala target -->
|
||||||
|
|
||||||
<target name="scala" description="build the scala directory">
|
<target name="scala" description="build the scala directory">
|
||||||
|
|
@ -769,20 +730,113 @@
|
||||||
<!-- ***************************************************************************** -->
|
<!-- ***************************************************************************** -->
|
||||||
<!-- where to put reports and tests-->
|
<!-- where to put reports and tests-->
|
||||||
<property name="report" value="${build.dir}/report"/>
|
<property name="report" value="${build.dir}/report"/>
|
||||||
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
|
||||||
<property name="test.output" value="${dist.dir}/test"/>
|
<property name="test.output" value="${dist.dir}/test"/>
|
||||||
<property name="java.public.test.sources" value="public/java/test"/>
|
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
||||||
<property name="java.private.test.sources" value="private/java/test"/>
|
<property name="java.public.test.classes" value="${java.test.classes}/public"/>
|
||||||
|
<property name="java.private.test.classes" value="${java.test.classes}/private"/>
|
||||||
|
<property name="java.public.test.sources" value="${public.dir}/java/test"/>
|
||||||
|
<property name="java.private.test.sources" value="${private.dir}/java/test"/>
|
||||||
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
||||||
<property name="scala.public.test.sources" value="public/scala/test"/>
|
<property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
|
||||||
<property name="scala.private.test.sources" value="private/scala/test"/>
|
<property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
|
||||||
|
<property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
|
||||||
|
<property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
|
||||||
|
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
|
||||||
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
||||||
<property name="test.maxmemory" value="4g"/>
|
<property name="test.maxmemory" value="4g"/>
|
||||||
|
|
||||||
|
<target name="test.init.compile">
|
||||||
|
<mkdir dir="${java.test.classes}"/>
|
||||||
|
<mkdir dir="${scala.test.classes}"/>
|
||||||
|
<antcall target="resolve">
|
||||||
|
<param name="ivy.conf" value="test"/>
|
||||||
|
</antcall>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.public.compile" depends="dist,test.init.compile">
|
||||||
|
<mkdir dir="${java.public.test.classes}"/>
|
||||||
|
<echo message="Sting: Compiling public test cases!"/>
|
||||||
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
|
<src path="${java.public.test.sources}"/>
|
||||||
|
<classpath>
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.classes}"/>
|
||||||
|
<pathelement location="${java.contracts}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
<compilerarg value="-proc:none"/>
|
||||||
|
<!--
|
||||||
|
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||||
|
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||||
|
-->
|
||||||
|
</javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
|
||||||
|
<mkdir dir="${java.private.test.classes}"/>
|
||||||
|
<echo message="Sting: Compiling private test cases!"/>
|
||||||
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
|
<src path="${java.private.test.sources}"/>
|
||||||
|
<classpath>
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.classes}"/>
|
||||||
|
<pathelement location="${java.contracts}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
<compilerarg value="-proc:none"/>
|
||||||
|
<!--
|
||||||
|
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||||
|
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||||
|
-->
|
||||||
|
</javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
|
||||||
|
|
||||||
|
<target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
||||||
|
<mkdir dir="${scala.public.test.classes}"/>
|
||||||
|
<echo message="Scala: Compiling public test cases!"/>
|
||||||
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
|
||||||
|
<src path="${scala.public.test.sources}" />
|
||||||
|
<classpath>
|
||||||
|
<path refid="scala.dependencies"/>
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
</scalac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
|
||||||
|
<mkdir dir="${scala.private.test.classes}"/>
|
||||||
|
<echo message="Scala: Compiling private test cases!"/>
|
||||||
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
|
||||||
|
<src path="${scala.private.test.sources}" />
|
||||||
|
<classpath>
|
||||||
|
<path refid="scala.dependencies"/>
|
||||||
|
<pathelement location="${scala.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.private.test.classes}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
</scalac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
|
||||||
|
|
||||||
|
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
|
||||||
|
|
||||||
<!-- TEST -->
|
<!-- TEST -->
|
||||||
<macrodef name="run-test">
|
<macrodef name="run-test">
|
||||||
<attribute name="testtype"/>
|
<attribute name="testtype"/>
|
||||||
|
<attribute name="outputdir"/>
|
||||||
|
<attribute name="runfailed"/>
|
||||||
|
|
||||||
<sequential>
|
<sequential>
|
||||||
|
<condition property="run.failed.tests">
|
||||||
|
<equals arg1="@{runfailed}" arg2="true"/>
|
||||||
|
</condition>
|
||||||
|
|
||||||
<!-- Get the pipeline run type. Default to dry. -->
|
<!-- Get the pipeline run type. Default to dry. -->
|
||||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||||
|
|
@ -792,10 +846,10 @@
|
||||||
<isset property="include.contracts" />
|
<isset property="include.contracts" />
|
||||||
</condition>
|
</condition>
|
||||||
|
|
||||||
<mkdir dir="${report}/@{testtype}"/>
|
<mkdir dir="@{outputdir}"/>
|
||||||
<echo message="Sting: Running @{testtype} test cases!"/>
|
<echo message="Sting: Running @{testtype} test cases!"/>
|
||||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
|
||||||
<testng outputDir="${report}/@{testtype}"
|
<testng outputDir="@{outputdir}"
|
||||||
haltOnFailure="false" failureProperty="test.failure"
|
haltOnFailure="false" failureProperty="test.failure"
|
||||||
verbose="2"
|
verbose="2"
|
||||||
workingDir="${basedir}"
|
workingDir="${basedir}"
|
||||||
|
|
@ -813,117 +867,108 @@
|
||||||
<pathelement location="${java.classes}" />
|
<pathelement location="${java.classes}" />
|
||||||
<pathelement location="${scala.classes}" />
|
<pathelement location="${scala.classes}" />
|
||||||
<pathelement location="${java.contracts}" />
|
<pathelement location="${java.contracts}" />
|
||||||
<pathelement location="${java.test.classes}" />
|
<pathelement location="${java.public.test.classes}" />
|
||||||
<pathelement location="${scala.test.classes}" />
|
<pathelement location="${java.private.test.classes}" />
|
||||||
|
<pathelement location="${scala.public.test.classes}" />
|
||||||
|
<pathelement location="${scala.private.test.classes}" />
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
||||||
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
|
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
|
||||||
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
|
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}.class" if="include.private"/>
|
||||||
|
</classfileset>
|
||||||
|
<classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}*.class" if="scala.include"/>
|
||||||
|
</classfileset>
|
||||||
|
<classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}*.class" if="include.scala.private"/>
|
||||||
|
</classfileset>
|
||||||
|
|
||||||
|
<xmlfileset dir="${basedir}">
|
||||||
|
<include name="@{testtype}" if="run.failed.tests"/>
|
||||||
|
</xmlfileset>
|
||||||
</testng>
|
</testng>
|
||||||
|
|
||||||
<!-- generate a report for Bamboo or Hudson to read in -->
|
<!-- generate a report for Bamboo or Hudson to read in -->
|
||||||
<junitreport todir="${report}/@{testtype}">
|
<junitreport todir="@{outputdir}">
|
||||||
<fileset dir="${report}/@{testtype}">
|
<fileset dir="@{outputdir}">
|
||||||
<include name="*/*.xml"/>
|
<include name="*/*.xml"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
<report format="noframes" todir="${report}/@{testtype}"/>
|
<report format="noframes" todir="@{outputdir}"/>
|
||||||
</junitreport>
|
</junitreport>
|
||||||
<fail message="test failed" if="test.failure" />
|
|
||||||
</sequential>
|
|
||||||
</macrodef>
|
|
||||||
|
|
||||||
<!-- FAILED-TEST -->
|
|
||||||
<macrodef name="run-failed-test">
|
|
||||||
<attribute name="xmlfailedtestfile" />
|
|
||||||
<sequential>
|
|
||||||
<!-- Get the pipeline run type. Default to dry. -->
|
|
||||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
|
||||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
|
||||||
</condition>
|
|
||||||
|
|
||||||
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
|
|
||||||
<isset property="include.contracts" />
|
|
||||||
</condition>
|
|
||||||
|
|
||||||
<mkdir dir="${report}/failed_rerun" />
|
|
||||||
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
|
|
||||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
<testng outputDir="${report}/failed_rerun"
|
|
||||||
haltOnFailure="false" failureProperty="test.failure"
|
|
||||||
verbose="2"
|
|
||||||
workingDir="${basedir}"
|
|
||||||
useDefaultListeners="false"
|
|
||||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
|
|
||||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
|
||||||
<jvmarg value="-Djava.awt.headless=true" />
|
|
||||||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
|
||||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
|
||||||
<jvmarg line="${cofoja.jvm.args}"/>
|
|
||||||
<!-- <jvmarg value="-Xdebug"/> -->
|
|
||||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
|
||||||
<classpath>
|
|
||||||
<path refid="external.dependencies" />
|
|
||||||
<pathelement location="${java.classes}" />
|
|
||||||
<pathelement location="${scala.classes}" />
|
|
||||||
<pathelement location="${java.contracts}" />
|
|
||||||
<pathelement location="${java.test.classes}" />
|
|
||||||
<pathelement location="${scala.test.classes}" />
|
|
||||||
</classpath>
|
|
||||||
|
|
||||||
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
|
|
||||||
</testng>
|
|
||||||
|
|
||||||
<fail message="test failed" if="test.failure" />
|
<fail message="test failed" if="test.failure" />
|
||||||
</sequential>
|
</sequential>
|
||||||
</macrodef>
|
</macrodef>
|
||||||
|
|
||||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
<target name="alltests">
|
||||||
<target name="test" depends="test.compile" description="Run unit tests">
|
<antcall target="test" inheritAll="false"/>
|
||||||
|
<antcall target="integrationtest" inheritAll="false"/>
|
||||||
|
<antcall target="pipelinetest" inheritAll="false"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="alltests.public">
|
||||||
|
<antcall target="test.public" inheritAll="false"/>
|
||||||
|
<antcall target="integrationtest.public" inheritAll="false"/>
|
||||||
|
<antcall target="pipelinetest.public" inheritAll="false"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
|
||||||
|
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
|
||||||
<condition property="ttype" value="*UnitTest" else="${single}">
|
<condition property="ttype" value="*UnitTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${ttype}"/>
|
<run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="integrationtest" depends="test.compile" description="Run integration tests">
|
<target name="test.public" depends="init.buildpublic,test"/>
|
||||||
|
|
||||||
|
<target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
|
||||||
<condition property="itype" value="*IntegrationTest" else="${single}">
|
<condition property="itype" value="*IntegrationTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${itype}"/>
|
<run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="performancetest" depends="test.compile" description="Run performance tests">
|
<target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
|
||||||
|
|
||||||
|
<target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
|
||||||
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${ptype}"/>
|
<run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
|
<target name="performancetest.public" depends="init.buildpublic,performancetest" />
|
||||||
|
|
||||||
|
<target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${pipetype}"/>
|
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
|
<target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
|
||||||
|
|
||||||
|
<target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||||
<property name="pipeline.run" value="run"/>
|
<property name="pipeline.run" value="run"/>
|
||||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${pipetype}"/>
|
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||||
|
</target>
|
||||||
|
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
|
||||||
|
|
||||||
|
<target name="failed-test" depends="init.buildall,test.compile">
|
||||||
|
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-test" depends="test.compile">
|
<target name="failed-integration" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
|
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-integration" depends="test.compile">
|
<target name="failed-performance" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
|
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-performance" depends="test.compile">
|
<target name="failed-pipeline" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
|
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="failed-pipeline" depends="test.compile">
|
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
|
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<!-- ******************************************************************************** -->
|
<!-- ******************************************************************************** -->
|
||||||
|
|
|
||||||
6
ivy.xml
6
ivy.xml
|
|
@ -15,10 +15,8 @@
|
||||||
<!-- Tribble -->
|
<!-- Tribble -->
|
||||||
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||||
|
|
||||||
<dependency org="log4j" name="log4j" rev="1.2.15">
|
<dependency org="log4j" name="log4j" rev="1.2.15"/>
|
||||||
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
<dependency org="javax.mail" name="mail" rev="1.4.4"/>
|
||||||
<exclude org="javax.mail" />
|
|
||||||
</dependency>
|
|
||||||
<dependency org="colt" name="colt" rev="1.2.0"/>
|
<dependency org="colt" name="colt" rev="1.2.0"/>
|
||||||
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
<dependency org="jboss" name="javassist" rev="3.7.ga"/>
|
||||||
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
<dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
|
||||||
|
|
|
||||||
|
|
@ -12,14 +12,14 @@ if ( onCMDLine ) {
|
||||||
inputFileName = args[1]
|
inputFileName = args[1]
|
||||||
outputPDF = args[2]
|
outputPDF = args[2]
|
||||||
} else {
|
} else {
|
||||||
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
inputFileName = "~/Desktop/Q-30033@gsa1.jobreport.txt"
|
||||||
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||||
outputPDF = NA
|
outputPDF = NA
|
||||||
}
|
}
|
||||||
|
|
||||||
RUNTIME_UNITS = "(sec)"
|
RUNTIME_UNITS = "(hours)"
|
||||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
ORIGINAL_UNITS_TO_SECONDS = 1/1000/60/60
|
||||||
|
|
||||||
#
|
#
|
||||||
# Helper function to aggregate all of the jobs in the report across all tables
|
# Helper function to aggregate all of the jobs in the report across all tables
|
||||||
|
|
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
|
||||||
#
|
#
|
||||||
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||||
#
|
#
|
||||||
plotJobsGantt <- function(gatkReport, sortOverall) {
|
plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
|
||||||
allJobs = allJobsFromReport(gatkReport)
|
allJobs = allJobsFromReport(gatkReport)
|
||||||
if ( sortOverall ) {
|
if ( sortOverall ) {
|
||||||
title = "All jobs, by analysis, by start time"
|
title = "All jobs, by analysis, by start time"
|
||||||
|
|
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||||
}
|
}
|
||||||
allJobs$index = 1:nrow(allJobs)
|
allJobs$index = 1:nrow(allJobs)
|
||||||
minTime = min(allJobs$startTime)
|
minTime = min(allJobs$startTime)
|
||||||
allJobs$relStartTime = allJobs$startTime - minTime
|
allJobs$relStartTime = (allJobs$startTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
|
||||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
allJobs$relDoneTime = (allJobs$doneTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
|
||||||
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||||
maxRelTime = max(allJobs$relDoneTime)
|
maxRelTime = max(allJobs$relDoneTime)
|
||||||
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
p <- p + theme_bw()
|
||||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
|
||||||
|
if ( includeText )
|
||||||
|
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||||
p <- p + xlim(0, maxRelTime * 1.1)
|
p <- p + xlim(0, maxRelTime * 1.1)
|
||||||
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||||
p <- p + ylab("Job")
|
p <- p + ylab("Job number")
|
||||||
p <- p + opts(title=title)
|
p <- p + opts(title=title)
|
||||||
print(p)
|
print(p)
|
||||||
}
|
}
|
||||||
|
|
@ -140,6 +142,8 @@ print(paste("Project :", inputFileName))
|
||||||
convertUnits <- function(gatkReportData) {
|
convertUnits <- function(gatkReportData) {
|
||||||
convertGroup <- function(g) {
|
convertGroup <- function(g) {
|
||||||
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
|
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
|
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
g
|
g
|
||||||
}
|
}
|
||||||
lapply(gatkReportData, convertGroup)
|
lapply(gatkReportData, convertGroup)
|
||||||
|
|
@ -155,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
|
||||||
pdf(outputPDF, height=8.5, width=11)
|
pdf(outputPDF, height=8.5, width=11)
|
||||||
}
|
}
|
||||||
|
|
||||||
plotJobsGantt(gatkReportData, T)
|
plotJobsGantt(gatkReportData, T, F)
|
||||||
plotJobsGantt(gatkReportData, F)
|
plotJobsGantt(gatkReportData, F, F)
|
||||||
plotProgressByTime(gatkReportData)
|
plotProgressByTime(gatkReportData)
|
||||||
for ( group in gatkReportData ) {
|
for ( group in gatkReportData ) {
|
||||||
plotGroup(group)
|
plotGroup(group)
|
||||||
|
|
|
||||||
|
|
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
private String RECAL_FILE = "output.recal_data.csv";
|
private String RECAL_FILE = "output.recal_data.csv";
|
||||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||||
private String OUTPUT_DIR = "analyzeCovariates/";
|
private String OUTPUT_DIR = "analyzeCovariates/";
|
||||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
|
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||||
private String PATH_TO_RSCRIPT = "Rscript";
|
private String PATH_TO_RSCRIPT = "Rscript";
|
||||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||||
private String PATH_TO_RESOURCES = "public/R/";
|
private String PATH_TO_RESOURCES = "public/R/";
|
||||||
|
|
|
||||||
|
|
@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tribbleType == null )
|
if ( tribbleType == null )
|
||||||
if ( ! file.canRead() | !! file.isFile() ) {
|
if ( ! file.canRead() | ! file.isFile() ) {
|
||||||
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
||||||
} else {
|
} else {
|
||||||
throw new UserException.CommandLineException(
|
throw new UserException.CommandLineException(
|
||||||
|
|
|
||||||
|
|
@ -929,6 +929,14 @@ public class GenomeAnalysisEngine {
|
||||||
return readsDataSource.getHeader(reader);
|
return readsDataSource.getHeader(reader);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary for this GATK engine instance
|
||||||
|
* @return a never-null dictionary listing all of the contigs known to this engine instance
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return getReferenceDataSource().getReference().getSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns data source object encapsulating all essential info and handlers used to traverse
|
* Returns data source object encapsulating all essential info and handlers used to traverse
|
||||||
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.picard.util.PeekableIterator;
|
import net.sf.picard.util.PeekableIterator;
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
import net.sf.samtools.GATKBAMFileSpan;
|
||||||
|
import net.sf.samtools.GATKChunk;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||||
|
|
||||||
|
|
@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
|
||||||
if(currentLocus == GenomeLoc.UNMAPPED) {
|
if(currentLocus == GenomeLoc.UNMAPPED) {
|
||||||
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
|
nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
|
||||||
for(SAMReaderID id: dataSource.getReaderIDs())
|
for(SAMReaderID id: dataSource.getReaderIDs())
|
||||||
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan());
|
nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
|
||||||
currentLocus = null;
|
currentLocus = null;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -215,6 +215,45 @@ public class GATKBAMIndex {
|
||||||
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use to get close to the unmapped reads at the end of a BAM file.
|
||||||
|
* @return The file offset of the first record in the last linear bin, or -1
|
||||||
|
* if there are no elements in linear bins (i.e. no mapped reads).
|
||||||
|
*/
|
||||||
|
public long getStartOfLastLinearBin() {
|
||||||
|
openIndexFile();
|
||||||
|
|
||||||
|
seek(4);
|
||||||
|
|
||||||
|
final int sequenceCount = readInteger();
|
||||||
|
// Because no reads may align to the last sequence in the sequence dictionary,
|
||||||
|
// grab the last element of the linear index for each sequence, and return
|
||||||
|
// the last one from the last sequence that has one.
|
||||||
|
long lastLinearIndexPointer = -1;
|
||||||
|
for (int i = 0; i < sequenceCount; i++) {
|
||||||
|
// System.out.println("# Sequence TID: " + i);
|
||||||
|
final int nBins = readInteger();
|
||||||
|
// System.out.println("# nBins: " + nBins);
|
||||||
|
for (int j1 = 0; j1 < nBins; j1++) {
|
||||||
|
// Skip bin #
|
||||||
|
skipBytes(4);
|
||||||
|
final int nChunks = readInteger();
|
||||||
|
// Skip chunks
|
||||||
|
skipBytes(16 * nChunks);
|
||||||
|
}
|
||||||
|
final int nLinearBins = readInteger();
|
||||||
|
if (nLinearBins > 0) {
|
||||||
|
// Skip to last element of list of linear bins
|
||||||
|
skipBytes(8 * (nLinearBins - 1));
|
||||||
|
lastLinearIndexPointer = readLongs(1)[0];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
closeIndexFile();
|
||||||
|
|
||||||
|
return lastLinearIndexPointer;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets the possible number of bins for a given reference sequence.
|
* Gets the possible number of bins for a given reference sequence.
|
||||||
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
* @return How many bins could possibly be used according to this indexing scheme to index a single contig.
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
||||||
*/
|
*/
|
||||||
public FilePointer next() {
|
public FilePointer next() {
|
||||||
FilePointer current = wrappedIterator.next();
|
FilePointer current = wrappedIterator.next();
|
||||||
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
|
||||||
current = current.combine(parser,wrappedIterator.next());
|
current = current.combine(parser,wrappedIterator.next());
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -134,24 +134,11 @@ public class ReadShardStrategy implements ShardStrategy {
|
||||||
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
|
||||||
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
while(selectedReaders.size() == 0 && currentFilePointer != null) {
|
||||||
shardPosition = currentFilePointer.fileSpans;
|
shardPosition = currentFilePointer.fileSpans;
|
||||||
|
|
||||||
for(SAMReaderID id: shardPosition.keySet()) {
|
for(SAMReaderID id: shardPosition.keySet()) {
|
||||||
// If the region contains location information (in other words, it is not at
|
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
|
||||||
// the start of the unmapped region), add the region.
|
if(!fileSpan.isEmpty())
|
||||||
if(currentFilePointer.isRegionUnmapped) {
|
selectedReaders.put(id,fileSpan);
|
||||||
// If the region is unmapped and no location data exists, add a null as an indicator to
|
|
||||||
// start at the next unmapped region.
|
|
||||||
if(!isIntoUnmappedRegion) {
|
|
||||||
selectedReaders.put(id,null);
|
|
||||||
isIntoUnmappedRegion = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
selectedReaders.put(id,position.get(id));
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
|
|
||||||
if(!fileSpan.isEmpty())
|
|
||||||
selectedReaders.put(id,fileSpan);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(selectedReaders.size() > 0) {
|
if(selectedReaders.size() > 0) {
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
|
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
* @author Your Name
|
* @author Your Name
|
||||||
* @since Date created
|
* @since Date created
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
||||||
/**
|
/**
|
||||||
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class PlatformFilter extends ReadFilter {
|
public class PlatformFilter extends ReadFilter {
|
||||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
|
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
|
||||||
protected String[] PLFilterNames;
|
protected String[] PLFilterNames;
|
||||||
|
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
else if ( stub.getOutputStream() != null ) {
|
else if ( stub.getOutputStream() != null ) {
|
||||||
this.file = null;
|
this.file = null;
|
||||||
this.stream = stub.getOutputStream();
|
this.stream = stub.getOutputStream();
|
||||||
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
|
writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||||
|
|
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
||||||
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.io.stubs;
|
package org.broadinstitute.sting.gatk.io.stubs;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||||
return isCompressed;
|
return isCompressed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary from the engine associated with this stub
|
||||||
|
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return engine.getMasterSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Should we tell the VCF writer not to write genotypes?
|
* Should we tell the VCF writer not to write genotypes?
|
||||||
* @return true if the writer should not write genotypes.
|
* @return true if the writer should not write genotypes.
|
||||||
|
|
|
||||||
|
|
@ -293,15 +293,16 @@ public class GATKRunReport {
|
||||||
* That is, postReport() is guarenteed not to fail for any reason.
|
* That is, postReport() is guarenteed not to fail for any reason.
|
||||||
*/
|
*/
|
||||||
private File postReportToLocalDisk(File rootDir) {
|
private File postReportToLocalDisk(File rootDir) {
|
||||||
|
String filename = getID() + ".report.xml.gz";
|
||||||
|
File file = new File(rootDir, filename);
|
||||||
try {
|
try {
|
||||||
String filename = getID() + ".report.xml.gz";
|
|
||||||
File file = new File(rootDir, filename);
|
|
||||||
postReportToFile(file);
|
postReportToFile(file);
|
||||||
logger.debug("Wrote report to " + file);
|
logger.debug("Wrote report to " + file);
|
||||||
return file;
|
return file;
|
||||||
} catch ( Exception e ) {
|
} catch ( Exception e ) {
|
||||||
// we catch everything, and no matter what eat the error
|
// we catch everything, and no matter what eat the error
|
||||||
exceptDuringRunReport("Couldn't read report file", e);
|
exceptDuringRunReport("Couldn't read report file", e);
|
||||||
|
file.delete();
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -312,6 +313,7 @@ public class GATKRunReport {
|
||||||
File localFile = postReportToLocalDisk(new File("./"));
|
File localFile = postReportToLocalDisk(new File("./"));
|
||||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||||
if ( localFile != null ) { // we succeeded in creating the local file
|
if ( localFile != null ) { // we succeeded in creating the local file
|
||||||
|
localFile.deleteOnExit();
|
||||||
try {
|
try {
|
||||||
// stop us from printing the annoying, and meaningless, mime types warning
|
// stop us from printing the annoying, and meaningless, mime types warning
|
||||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||||
|
|
@ -336,14 +338,13 @@ public class GATKRunReport {
|
||||||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||||
logger.debug("Uploaded to AWS: " + s3Object);
|
logger.debug("Uploaded to AWS: " + s3Object);
|
||||||
|
logger.info("Uploaded run statistics report to AWS S3");
|
||||||
} catch ( S3ServiceException e ) {
|
} catch ( S3ServiceException e ) {
|
||||||
exceptDuringRunReport("S3 exception occurred", e);
|
exceptDuringRunReport("S3 exception occurred", e);
|
||||||
} catch ( NoSuchAlgorithmException e ) {
|
} catch ( NoSuchAlgorithmException e ) {
|
||||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
exceptDuringRunReport("Couldn't read report file", e);
|
exceptDuringRunReport("Couldn't read report file", e);
|
||||||
} finally {
|
|
||||||
localFile.delete();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram {
|
||||||
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
||||||
|
|
||||||
// add writing of the sequence dictionary, if supplied
|
// add writing of the sequence dictionary, if supplied
|
||||||
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
|
builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
|
||||||
|
|
||||||
// create the output stream, and write the index
|
// create the output stream, and write the index
|
||||||
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.index.Index;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
|
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||||
|
|
||||||
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utilities for working with Sequence Dictionaries embedded in tribble indices
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class IndexDictionaryUtils {
|
||||||
|
private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
|
||||||
|
|
||||||
|
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||||
|
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
||||||
|
* @param index the index file to use
|
||||||
|
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||||
|
*/
|
||||||
|
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
||||||
|
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||||
|
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
||||||
|
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
||||||
|
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
||||||
|
Integer.valueOf(entry.getValue())));
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* create the sequence dictionary with the contig list; a backup approach
|
||||||
|
* @param index the index file to use
|
||||||
|
* @param dict the sequence dictionary to add contigs to
|
||||||
|
* @return the filled-in sequence dictionary
|
||||||
|
*/
|
||||||
|
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
||||||
|
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
||||||
|
if (seqNames == null) {
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
for (String name : seqNames) {
|
||||||
|
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
||||||
|
dict.addSequence(seq);
|
||||||
|
}
|
||||||
|
return dict;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
|
||||||
|
for ( SAMSequenceRecord seq : dict.getSequences() ) {
|
||||||
|
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
|
||||||
|
final String length = String.valueOf(seq.getSequenceLength());
|
||||||
|
index.addProperty(contig,length);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void validateTrackSequenceDictionary(final String trackName,
|
||||||
|
final SAMSequenceDictionary trackDict,
|
||||||
|
final SAMSequenceDictionary referenceDict,
|
||||||
|
final ValidationExclusion.TYPE validationExclusionType ) {
|
||||||
|
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||||
|
if (trackDict == null || trackDict.size() == 0)
|
||||||
|
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||||
|
else {
|
||||||
|
Set<String> trackSequences = new TreeSet<String>();
|
||||||
|
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||||
|
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||||
|
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,7 +25,6 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
|
|
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -52,11 +50,6 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.LinkedHashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.TreeSet;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||||
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
||||||
|
|
||||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
|
||||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
|
||||||
|
|
||||||
// private sequence dictionary we use to set our tracks with
|
// private sequence dictionary we use to set our tracks with
|
||||||
private SAMSequenceDictionary dict = null;
|
private SAMSequenceDictionary dict = null;
|
||||||
|
|
||||||
|
|
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||||
catch (ReviewedStingException e) { }
|
catch (ReviewedStingException e) { }
|
||||||
|
|
||||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||||
|
|
||||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||||
if (sequenceDictionary.size() == 0 && dict != null) {
|
if (sequenceDictionary.size() == 0 && dict != null) {
|
||||||
File indexFile = Tribble.indexFile(inputFile);
|
File indexFile = Tribble.indexFile(inputFile);
|
||||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
|
||||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
try { // re-write the index
|
||||||
|
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
||||||
|
} catch (IOException e) {
|
||||||
|
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||||
|
}
|
||||||
|
|
||||||
|
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||||
|
|
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
// this can take a while, let them know what we're doing
|
// this can take a while, let them know what we're doing
|
||||||
logger.info("Creating Tribble index in memory for file " + inputFile);
|
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||||
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||||
setIndexSequenceDictionary(inputFile, idx, dict, null, false);
|
validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
|
||||||
// static functions to work with the sequence dictionaries of indexes
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
|
||||||
* @param index the index file to use
|
|
||||||
* @return a SAMSequenceDictionary if available, null if unavailable
|
|
||||||
*/
|
|
||||||
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
|
||||||
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
|
||||||
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
|
||||||
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
|
||||||
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
|
||||||
Integer.valueOf(entry.getValue())));
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* create the sequence dictionary with the contig list; a backup approach
|
|
||||||
* @param index the index file to use
|
|
||||||
* @param dict the sequence dictionary to add contigs to
|
|
||||||
* @return the filled-in sequence dictionary
|
|
||||||
*/
|
|
||||||
private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
|
||||||
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
|
||||||
if (seqNames == null) {
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
for (String name : seqNames) {
|
|
||||||
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
|
||||||
dict.addSequence(seq);
|
|
||||||
}
|
|
||||||
return dict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
||||||
* (that each contig in the index is in the sequence dictionary).
|
* (that each contig in the index is in the sequence dictionary).
|
||||||
* @param inputFile for proper error message formatting.
|
* @param inputFile for proper error message formatting.
|
||||||
* @param dict the sequence dictionary
|
* @param dict the sequence dictionary
|
||||||
* @param index the index file
|
* @param index the index file
|
||||||
* @param indexFile the index file
|
|
||||||
* @param rewriteIndex should we rewrite the index when we're done?
|
|
||||||
*
|
|
||||||
*/
|
*/
|
||||||
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
|
public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
|
||||||
if (dict == null) return;
|
if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
|
||||||
|
|
||||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
|
||||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
|
||||||
|
|
||||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||||
if (dict.getSequence(seq.getSequenceName()) == null)
|
validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
|
||||||
continue;
|
|
||||||
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
|
// actually update the dictionary in the index
|
||||||
}
|
IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
|
||||||
// re-write the index
|
|
||||||
if (rewriteIndex) try {
|
|
||||||
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
|
||||||
} catch (IOException e) {
|
|
||||||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void validateTrackSequenceDictionary(final String trackName,
|
||||||
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
final SAMSequenceDictionary trackDict,
|
||||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
final SAMSequenceDictionary referenceDict ) {
|
||||||
if (trackDict == null || trackDict.size() == 0)
|
IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
|
||||||
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
|
||||||
else {
|
|
||||||
Set<String> trackSequences = new TreeSet<String>();
|
|
||||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
|
||||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
|
||||||
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,12 +24,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.report;
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tracks a linked list of GATKReportColumn in order by name.
|
* Tracks a linked list of GATKReportColumn in order by name.
|
||||||
*/
|
*/
|
||||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
|
||||||
private List<String> columnNames = new ArrayList<String>();
|
private List<String> columnNames = new ArrayList<String>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||||
columnNames.add(key);
|
columnNames.add(key);
|
||||||
return super.put(key, value);
|
return super.put(key, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Iterator<GATKReportColumn> iterator() {
|
||||||
|
return new Iterator<GATKReportColumn>() {
|
||||||
|
int offset = 0;
|
||||||
|
public boolean hasNext() { return offset < columnNames.size() ; }
|
||||||
|
public GATKReportColumn next() { return getByIndex(offset++); }
|
||||||
|
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
|
||||||
|
};
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -286,6 +286,10 @@ public class GATKReportTable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean containsKey(Object primaryKey) {
|
||||||
|
return primaryKeyColumn.contains(primaryKey);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the value for a given position in the table
|
* Set the value for a given position in the table
|
||||||
*
|
*
|
||||||
|
|
|
||||||
|
|
@ -358,7 +358,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
public void printOnTraversalDone() {
|
public void printOnTraversalDone() {
|
||||||
printProgress(null, null, true);
|
printProgress(null, null, true);
|
||||||
|
|
||||||
final double elapsed = timer.getElapsedTime();
|
final double elapsed = timer == null ? 0 : timer.getElapsedTime();
|
||||||
|
|
||||||
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
|
ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,21 +26,23 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
||||||
* of the given ROD.
|
* of the given ROD.
|
||||||
*/
|
*/
|
||||||
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
||||||
|
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
|
||||||
|
public RodBinding<Feature> input;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
|
|
@ -62,7 +64,7 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
for ( Feature feature : tracker.getValues(Feature.class) ) {
|
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
|
||||||
out.println(feature.toString());
|
out.println(feature.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
* -I input1.bam \
|
* -I input1.bam \
|
||||||
* -I input2.bam \
|
* -I input2.bam \
|
||||||
* --read_filter MappingQualityZero
|
* --read_filter MappingQualityZero
|
||||||
|
*
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T PrintReads \
|
||||||
|
* -o output.bam \
|
||||||
|
* -I input.bam \
|
||||||
|
* -n 2000
|
||||||
* </pre>
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
|
@ -77,6 +78,15 @@ public abstract class Walker<MapType, ReduceType> {
|
||||||
return toolkit;
|
return toolkit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets the master sequence dictionary for this walker
|
||||||
|
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||||
|
return getToolkit().getMasterSequenceDictionary();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
||||||
* that contain a deletion at the locus.
|
* that contain a deletion at the locus.
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
|
||||||
|
*/
|
||||||
public class AlleleBalance extends InfoFieldAnnotation {
|
public class AlleleBalance extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
|
||||||
|
*/
|
||||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract base class for all annotations that are normalized by depth
|
||||||
|
*/
|
||||||
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,6 +47,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Count of A, C, G, T bases across all samples
|
||||||
|
*/
|
||||||
public class BaseCounts extends InfoFieldAnnotation {
|
public class BaseCounts extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
|
||||||
|
*/
|
||||||
public class BaseQualityRankSumTest extends RankSumTest {
|
public class BaseQualityRankSumTest extends RankSumTest {
|
||||||
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,11 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allele count in genotypes, for each ALT allele, in the same order as listed;
|
||||||
|
* allele Frequency, for each ALT allele, in the same order as listed; total number
|
||||||
|
* of alleles in called genotypes.
|
||||||
|
*/
|
||||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,23 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total (unfiltered) depth over all samples.
|
||||||
|
*
|
||||||
|
* This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||||
|
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||||
|
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||||
|
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||||
|
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||||
|
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||||
|
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||||
|
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||||
|
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||||
|
* normally be excluded from the statistical calculations going into GQ and QUAL.
|
||||||
|
*
|
||||||
|
* Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
|
||||||
|
* -dcov D is N * D
|
||||||
|
*/
|
||||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,25 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The depth of coverage of each VCF allele in this sample.
|
||||||
|
*
|
||||||
|
* This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
|
||||||
|
* at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
|
||||||
|
* quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
|
||||||
|
* The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
|
||||||
|
* REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
|
||||||
|
* power I have to determine the genotype of the sample at this site, while the AD tells me how many times
|
||||||
|
* I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
|
||||||
|
* the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
|
||||||
|
* to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
|
||||||
|
* normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
|
||||||
|
* the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
|
||||||
|
* are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
|
||||||
|
* because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
|
||||||
|
* assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
|
||||||
|
* determine the genotype calls (see below).
|
||||||
|
*/
|
||||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static String REF_ALLELE = "REF";
|
private static String REF_ALLELE = "REF";
|
||||||
|
|
|
||||||
|
|
@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
|
||||||
|
* being seen on only the forward or only the reverse strand) in the reads? More bias is
|
||||||
|
* indicative of false positive calls.
|
||||||
|
*/
|
||||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
|
||||||
|
*/
|
||||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
import org.broadinstitute.sting.utils.sam.AlignmentUtils;
|
||||||
|
|
@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Consistency of the site with two (and only two) segregating haplotypes. Higher scores
|
||||||
|
* are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
|
||||||
|
*/
|
||||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
|
||||||
|
*/
|
||||||
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Largest contiguous homopolymer run of the variant allele in either direction on the reference.
|
||||||
|
*/
|
||||||
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
|
public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private boolean ANNOTATE_INDELS = true;
|
private boolean ANNOTATE_INDELS = true;
|
||||||
|
|
|
||||||
|
|
@ -17,14 +17,15 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: rpoplin
|
|
||||||
* Date: 5/16/11
|
|
||||||
*/
|
|
||||||
|
|
||||||
// A set of annotations calculated directly from the GLs
|
/**
|
||||||
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
|
* Likelihood-based (using PL field) test for the inbreeding among samples.
|
||||||
|
*
|
||||||
|
* A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
|
||||||
|
* well with limited coverage per sample. See the 1000 Genomes Phase I release for
|
||||||
|
* more information.
|
||||||
|
*/
|
||||||
|
public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
|
|
||||||
|
|
@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Rough category of indel type (insertion, deletion, multi-allelic, other)
|
||||||
* User: delangel
|
|
||||||
* Date: Mar 11, 2011
|
|
||||||
* Time: 11:47:33 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
|
||||||
|
*/
|
||||||
public class LowMQ extends InfoFieldAnnotation {
|
public class LowMQ extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
|
||||||
|
*/
|
||||||
public class MappingQualityRankSumTest extends RankSumTest {
|
public class MappingQualityRankSumTest extends RankSumTest {
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
|
||||||
|
|
|
||||||
|
|
@ -19,6 +19,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Total count across all samples of mapping quality zero reads
|
||||||
|
*/
|
||||||
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -44,11 +44,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Count for each sample of mapping quality zero reads
|
||||||
* User: asivache
|
|
||||||
* Date: Feb 4, 2011
|
|
||||||
* Time: 6:46:25 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
public class MappingQualityZeroBySample extends GenotypeAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
public Map<String, Object> annotate(RefMetaDataTracker tracker,
|
||||||
|
|
|
||||||
|
|
@ -17,8 +17,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fraction of all reads across samples that have mapping quality zero
|
||||||
|
*/
|
||||||
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,8 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* The number of N bases, counting only SOLiD data
|
||||||
* User: rpoplin
|
|
||||||
* Date: 5/16/11
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class NBaseCount extends InfoFieldAnnotation {
|
public class NBaseCount extends InfoFieldAnnotation {
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if( stratifiedContexts.size() == 0 )
|
if( stratifiedContexts.size() == 0 )
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -15,7 +16,11 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
|
||||||
|
*
|
||||||
|
* Low scores are indicative of false positive calls and artifacts.
|
||||||
|
*/
|
||||||
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Root Mean Square of the mapping quality of the reads across all samples.
|
||||||
|
*/
|
||||||
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Abstract root for all RankSum based annotations
|
||||||
|
*/
|
||||||
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
static final double INDEL_LIKELIHOOD_THRESH = 0.1;
|
||||||
static final boolean DEBUG = false;
|
static final boolean DEBUG = false;
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -47,12 +48,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Unsupported
|
||||||
* User: asivache
|
|
||||||
* Date: Feb 4, 2011
|
|
||||||
* Time: 3:59:27 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
|
||||||
|
|
||||||
private static String REF_ALLELE = "REF";
|
private static String REF_ALLELE = "REF";
|
||||||
|
|
|
||||||
|
|
@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
|
||||||
* User: rpoplin
|
|
||||||
* Date: 3/30/11
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class ReadPosRankSumTest extends RankSumTest {
|
public class ReadPosRankSumTest extends RankSumTest {
|
||||||
|
|
||||||
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
|
||||||
|
|
|
||||||
|
|
@ -15,8 +15,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SB annotation value by depth of alt containing samples
|
||||||
|
*/
|
||||||
public class SBByDepth extends AnnotationByDepth {
|
public class SBByDepth extends AnnotationByDepth {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
@ -26,7 +27,7 @@ public class SBByDepth extends AnnotationByDepth {
|
||||||
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
|
double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
|
||||||
|
|
||||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() == 0 )
|
if ( genotypes == null || genotypes.size() == 0 )
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,9 @@ import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* List all of the samples in the info field
|
||||||
|
*/
|
||||||
public class SampleList extends InfoFieldAnnotation {
|
public class SampleList extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,9 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -46,134 +45,522 @@ import java.util.*;
|
||||||
* (http://snpeff.sourceforge.net/).
|
* (http://snpeff.sourceforge.net/).
|
||||||
*
|
*
|
||||||
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
|
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
|
||||||
* output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
|
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
|
||||||
* and adds annotations on that effect.
|
* and adds annotations on that effect.
|
||||||
*
|
*
|
||||||
* The possible biological effects and their associated impacts are defined in the class:
|
|
||||||
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
|
|
||||||
*
|
|
||||||
* @author David Roazen
|
* @author David Roazen
|
||||||
*/
|
*/
|
||||||
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
// SnpEff annotation key names:
|
private static Logger logger = Logger.getLogger(SnpEff.class);
|
||||||
public static final String GENE_ID_KEY = "GENE_ID";
|
|
||||||
public static final String GENE_NAME_KEY = "GENE_NAME";
|
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
||||||
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
|
// lacking a SnpEff version number in the VCF header:
|
||||||
public static final String EXON_ID_KEY = "EXON_ID";
|
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
||||||
public static final String EXON_RANK_KEY = "EXON_RANK";
|
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||||
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
|
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
||||||
public static final String EFFECT_KEY = "EFFECT";
|
|
||||||
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
|
// When we write the SnpEff version number and command line to the output VCF, we change
|
||||||
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
|
// the key name slightly so that the output VCF won't be confused in the future for an
|
||||||
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
|
// output file produced by SnpEff directly:
|
||||||
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
|
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
|
||||||
public static final String CODON_NUM_KEY = "CODON_NUM";
|
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
|
||||||
public static final String CDS_SIZE_KEY = "CDS_SIZE";
|
|
||||||
|
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
|
||||||
|
// field annotation with the key EFF:
|
||||||
|
public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
|
||||||
|
public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
|
||||||
|
public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
|
||||||
|
|
||||||
|
// Key names for the INFO field annotations we will add to each record, along
|
||||||
|
// with parsing-related information:
|
||||||
|
public enum InfoFieldKey {
|
||||||
|
EFFECT_KEY ("SNPEFF_EFFECT", -1),
|
||||||
|
IMPACT_KEY ("SNPEFF_IMPACT", 0),
|
||||||
|
CODON_CHANGE_KEY ("SNPEFF_CODON_CHANGE", 1),
|
||||||
|
AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
|
||||||
|
GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3),
|
||||||
|
GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4),
|
||||||
|
TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6),
|
||||||
|
EXON_ID_KEY ("SNPEFF_EXON_ID", 7),
|
||||||
|
FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1);
|
||||||
|
|
||||||
|
// Actual text of the key
|
||||||
|
private final String keyName;
|
||||||
|
|
||||||
|
// Index within the effect metadata subfields from the SnpEff EFF annotation
|
||||||
|
// where each key's associated value can be found during parsing.
|
||||||
|
private final int fieldIndex;
|
||||||
|
|
||||||
|
InfoFieldKey ( String keyName, int fieldIndex ) {
|
||||||
|
this.keyName = keyName;
|
||||||
|
this.fieldIndex = fieldIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getKeyName() {
|
||||||
|
return keyName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getFieldIndex() {
|
||||||
|
return fieldIndex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
|
||||||
|
// are validated against this list.
|
||||||
|
public enum EffectType {
|
||||||
|
// High-impact effects:
|
||||||
|
FRAME_SHIFT (EffectFunctionalClass.NONE, false),
|
||||||
|
STOP_GAINED (EffectFunctionalClass.NONSENSE, false),
|
||||||
|
START_LOST (EffectFunctionalClass.NONE, false),
|
||||||
|
SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false),
|
||||||
|
SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false),
|
||||||
|
EXON_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
|
STOP_LOST (EffectFunctionalClass.NONE, false),
|
||||||
|
|
||||||
|
// Moderate-impact effects:
|
||||||
|
NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false),
|
||||||
|
CODON_CHANGE (EffectFunctionalClass.NONE, false),
|
||||||
|
CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||||
|
CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false),
|
||||||
|
CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||||
|
CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false),
|
||||||
|
UTR_5_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
|
UTR_3_DELETED (EffectFunctionalClass.NONE, false),
|
||||||
|
|
||||||
|
// Low-impact effects:
|
||||||
|
SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false),
|
||||||
|
SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||||
|
NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false),
|
||||||
|
SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||||
|
NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false),
|
||||||
|
START_GAINED (EffectFunctionalClass.NONE, false),
|
||||||
|
|
||||||
|
// Modifiers:
|
||||||
|
NONE (EffectFunctionalClass.NONE, true),
|
||||||
|
CHROMOSOME (EffectFunctionalClass.NONE, true),
|
||||||
|
INTERGENIC (EffectFunctionalClass.NONE, true),
|
||||||
|
UPSTREAM (EffectFunctionalClass.NONE, true),
|
||||||
|
UTR_5_PRIME (EffectFunctionalClass.NONE, true),
|
||||||
|
CDS (EffectFunctionalClass.NONE, true),
|
||||||
|
GENE (EffectFunctionalClass.NONE, true),
|
||||||
|
TRANSCRIPT (EffectFunctionalClass.NONE, true),
|
||||||
|
EXON (EffectFunctionalClass.NONE, true),
|
||||||
|
INTRON (EffectFunctionalClass.NONE, true),
|
||||||
|
UTR_3_PRIME (EffectFunctionalClass.NONE, true),
|
||||||
|
DOWNSTREAM (EffectFunctionalClass.NONE, true),
|
||||||
|
INTRON_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||||
|
INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true),
|
||||||
|
REGULATION (EffectFunctionalClass.NONE, true),
|
||||||
|
CUSTOM (EffectFunctionalClass.NONE, true),
|
||||||
|
WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true);
|
||||||
|
|
||||||
|
private final EffectFunctionalClass functionalClass;
|
||||||
|
private final boolean isModifier;
|
||||||
|
|
||||||
|
EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
|
||||||
|
this.functionalClass = functionalClass;
|
||||||
|
this.isModifier = isModifier;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EffectFunctionalClass getFunctionalClass() {
|
||||||
|
return functionalClass;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isModifier() {
|
||||||
|
return isModifier;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
|
||||||
|
// classifying some of the LOW impact effects as MODIFIERs.
|
||||||
|
public enum EffectImpact {
|
||||||
|
MODIFIER (0),
|
||||||
|
LOW (1),
|
||||||
|
MODERATE (2),
|
||||||
|
HIGH (3);
|
||||||
|
|
||||||
|
private final int severityRating;
|
||||||
|
|
||||||
|
EffectImpact ( int severityRating ) {
|
||||||
|
this.severityRating = severityRating;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||||
|
return this.severityRating > other.severityRating;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSameImpactAs ( EffectImpact other ) {
|
||||||
|
return this.severityRating == other.severityRating;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
|
||||||
|
public enum EffectCoding {
|
||||||
|
CODING,
|
||||||
|
NON_CODING,
|
||||||
|
UNKNOWN
|
||||||
|
}
|
||||||
|
|
||||||
|
// We assign a functional class to each SnpEff effect.
|
||||||
|
public enum EffectFunctionalClass {
|
||||||
|
NONE (0),
|
||||||
|
SILENT (1),
|
||||||
|
MISSENSE (2),
|
||||||
|
NONSENSE (3);
|
||||||
|
|
||||||
|
private final int priority;
|
||||||
|
|
||||||
|
EffectFunctionalClass ( int priority ) {
|
||||||
|
this.priority = priority;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHigherPriorityThan ( EffectFunctionalClass other ) {
|
||||||
|
return this.priority > other.priority;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||||
|
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||||
|
// without providing a SnpEff rod via --snpEffFile):
|
||||||
|
validateRodBinding(walker.getSnpEffRodBinding());
|
||||||
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
|
|
||||||
|
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
|
||||||
|
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
|
||||||
|
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
||||||
|
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||||
|
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
|
||||||
|
|
||||||
|
checkSnpEffVersion(snpEffVersionLine);
|
||||||
|
checkSnpEffCommandLine(snpEffCommandLine);
|
||||||
|
|
||||||
|
// If everything looks ok, add the SnpEff version number and command-line header lines to the
|
||||||
|
// header of the VCF output file, changing the key names so that our output file won't be
|
||||||
|
// mistaken in the future for a SnpEff output file:
|
||||||
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
||||||
|
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||||
|
}
|
||||||
|
|
||||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||||
RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
|
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||||
validateRodBinding(snpEffRodBinding);
|
|
||||||
|
|
||||||
List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
|
// Get only SnpEff records that start at this locus, not merely span it:
|
||||||
|
List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
|
||||||
|
|
||||||
// Add only annotations for one of the most biologically-significant effects as defined in
|
// Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
|
||||||
// the SnpEffConstants class:
|
// If there is more than one such record, we only need to pick the first one, since the biological
|
||||||
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
|
// effects will be the same across all such records:
|
||||||
|
VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
|
||||||
if ( mostSignificantEffect == null ) {
|
if ( matchingRecord == null ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
return generateAnnotations(mostSignificantEffect);
|
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
|
||||||
|
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
|
||||||
|
if ( effects.size() == 0 ) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add only annotations for one of the most biologically-significant effects from this set:
|
||||||
|
SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
|
||||||
|
return mostSignificantEffect.getAnnotations();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
|
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
|
||||||
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
|
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
|
||||||
throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
|
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
|
||||||
"as a rodbinding on the command line, but no SnpEff rodbinding was found.");
|
"as a rodbinding on the command line via the --snpEffFile option, but " +
|
||||||
|
"no SnpEff rodbinding was found.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
|
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
|
||||||
SnpEffFeature mostSignificantEffect = null;
|
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
||||||
|
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||||
|
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
|
||||||
|
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
|
||||||
|
}
|
||||||
|
|
||||||
for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
|
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
|
||||||
|
|
||||||
|
if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
|
||||||
|
throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
|
||||||
|
"is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
|
||||||
|
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
|
||||||
|
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||||
|
"input file, which should be added by all supported versions of SnpEff (" +
|
||||||
|
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isSupportedSnpEffVersion ( String versionString ) {
|
||||||
|
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
|
||||||
|
if ( supportedVersion.equals(versionString) ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
|
||||||
|
for ( VariantContext snpEffRecord : snpEffRecords ) {
|
||||||
|
if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
|
||||||
|
return snpEffRecord;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
|
||||||
|
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
|
||||||
|
|
||||||
|
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
|
||||||
|
if ( effectFieldValue == null ) {
|
||||||
|
return parsedEffects;
|
||||||
|
}
|
||||||
|
|
||||||
|
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
|
||||||
|
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
|
||||||
|
List<String> individualEffects;
|
||||||
|
if ( effectFieldValue instanceof List ) {
|
||||||
|
individualEffects = (List<String>)effectFieldValue;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
individualEffects = Arrays.asList((String)effectFieldValue);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( String effectString : individualEffects ) {
|
||||||
|
String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
|
||||||
|
|
||||||
|
if ( effectNameAndMetadata.length != 2 ) {
|
||||||
|
logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
|
||||||
|
snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String effectName = effectNameAndMetadata[0];
|
||||||
|
String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
|
||||||
|
|
||||||
|
SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
|
||||||
|
|
||||||
|
if ( parsedEffect.isWellFormed() ) {
|
||||||
|
parsedEffects.add(parsedEffect);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
|
||||||
|
snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return parsedEffects;
|
||||||
|
}
|
||||||
|
|
||||||
|
private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
|
||||||
|
SnpEffEffect mostSignificantEffect = null;
|
||||||
|
|
||||||
|
for ( SnpEffEffect effect : effects ) {
|
||||||
if ( mostSignificantEffect == null ||
|
if ( mostSignificantEffect == null ||
|
||||||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
|
effect.isHigherImpactThan(mostSignificantEffect) ) {
|
||||||
|
|
||||||
mostSignificantEffect = snpEffFeature;
|
mostSignificantEffect = effect;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return mostSignificantEffect;
|
return mostSignificantEffect;
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
|
|
||||||
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
|
|
||||||
|
|
||||||
if ( mostSignificantEffect.hasGeneID() )
|
|
||||||
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
|
|
||||||
if ( mostSignificantEffect.hasGeneName() )
|
|
||||||
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
|
|
||||||
if ( mostSignificantEffect.hasTranscriptID() )
|
|
||||||
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
|
|
||||||
if ( mostSignificantEffect.hasExonID() )
|
|
||||||
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
|
|
||||||
if ( mostSignificantEffect.hasExonRank() )
|
|
||||||
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
|
|
||||||
if ( mostSignificantEffect.isNonCodingGene() )
|
|
||||||
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
|
|
||||||
|
|
||||||
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
|
|
||||||
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
|
|
||||||
if ( mostSignificantEffect.hasEffectExtraInformation() )
|
|
||||||
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
|
|
||||||
|
|
||||||
if ( mostSignificantEffect.hasOldAndNewAA() )
|
|
||||||
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
|
|
||||||
if ( mostSignificantEffect.hasOldAndNewCodon() )
|
|
||||||
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
|
|
||||||
if ( mostSignificantEffect.hasCodonNum() )
|
|
||||||
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
|
|
||||||
if ( mostSignificantEffect.hasCdsSize() )
|
|
||||||
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
|
|
||||||
|
|
||||||
return annotations;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<String> getKeyNames() {
|
public List<String> getKeyNames() {
|
||||||
return Arrays.asList( GENE_ID_KEY,
|
return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
|
||||||
GENE_NAME_KEY,
|
InfoFieldKey.IMPACT_KEY.getKeyName(),
|
||||||
TRANSCRIPT_ID_KEY,
|
InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
|
||||||
EXON_ID_KEY,
|
InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
|
||||||
EXON_RANK_KEY,
|
InfoFieldKey.GENE_NAME_KEY.getKeyName(),
|
||||||
WITHIN_NON_CODING_GENE_KEY,
|
InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
|
||||||
EFFECT_KEY,
|
InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
|
||||||
EFFECT_IMPACT_KEY,
|
InfoFieldKey.EXON_ID_KEY.getKeyName(),
|
||||||
EFFECT_EXTRA_INFORMATION_KEY,
|
InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
|
||||||
OLD_NEW_AA_KEY,
|
|
||||||
OLD_NEW_CODON_KEY,
|
|
||||||
CODON_NUM_KEY,
|
|
||||||
CDS_SIZE_KEY
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
return Arrays.asList(
|
return Arrays.asList(
|
||||||
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||||
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
|
||||||
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
|
new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
||||||
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
|
new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
|
||||||
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
|
||||||
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
|
||||||
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
|
|
||||||
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper class to parse, validate, and store a single SnpEff effect and its metadata.
|
||||||
|
*/
|
||||||
|
protected static class SnpEffEffect {
|
||||||
|
private EffectType effect;
|
||||||
|
private EffectImpact impact;
|
||||||
|
private String codonChange;
|
||||||
|
private String aminoAcidChange;
|
||||||
|
private String geneName;
|
||||||
|
private String geneBiotype;
|
||||||
|
private EffectCoding coding;
|
||||||
|
private String transcriptID;
|
||||||
|
private String exonID;
|
||||||
|
|
||||||
|
private String parseError = null;
|
||||||
|
private boolean isWellFormed = true;
|
||||||
|
|
||||||
|
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
|
||||||
|
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
|
||||||
|
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
|
||||||
|
|
||||||
|
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
|
||||||
|
// errors come after warnings, not vice versa:
|
||||||
|
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
|
||||||
|
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
|
||||||
|
|
||||||
|
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
|
||||||
|
|
||||||
|
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
|
||||||
|
parseEffectName(effectName);
|
||||||
|
parseEffectMetadata(effectMetadata);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseEffectName ( String effectName ) {
|
||||||
|
try {
|
||||||
|
effect = EffectType.valueOf(effectName);
|
||||||
|
}
|
||||||
|
catch ( IllegalArgumentException e ) {
|
||||||
|
parseError(String.format("%s is not a recognized effect type", effectName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseEffectMetadata ( String[] effectMetadata ) {
|
||||||
|
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
|
||||||
|
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
|
||||||
|
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
|
||||||
|
}
|
||||||
|
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
|
||||||
|
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
|
||||||
|
EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( effect != null && effect.isModifier() ) {
|
||||||
|
impact = EffectImpact.MODIFIER;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
try {
|
||||||
|
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
|
||||||
|
}
|
||||||
|
catch ( IllegalArgumentException e ) {
|
||||||
|
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
|
||||||
|
aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
|
||||||
|
geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
|
||||||
|
geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
|
||||||
|
|
||||||
|
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
|
||||||
|
try {
|
||||||
|
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
|
||||||
|
}
|
||||||
|
catch ( IllegalArgumentException e ) {
|
||||||
|
parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
coding = EffectCoding.UNKNOWN;
|
||||||
|
}
|
||||||
|
|
||||||
|
transcriptID = effectMetadata[InfoFieldKey.TRANSCRIPT_ID_KEY.getFieldIndex()];
|
||||||
|
exonID = effectMetadata[InfoFieldKey.EXON_ID_KEY.getFieldIndex()];
|
||||||
|
}
|
||||||
|
|
||||||
|
private void parseError ( String message ) {
|
||||||
|
isWellFormed = false;
|
||||||
|
|
||||||
|
// Cache only the first error encountered:
|
||||||
|
if ( parseError == null ) {
|
||||||
|
parseError = message;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isWellFormed() {
|
||||||
|
return isWellFormed;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getParseError() {
|
||||||
|
return parseError == null ? "" : parseError;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isCoding() {
|
||||||
|
return coding == EffectCoding.CODING;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHigherImpactThan ( SnpEffEffect other ) {
|
||||||
|
// If one effect is within a coding gene and the other is not, the effect that is
|
||||||
|
// within the coding gene has higher impact:
|
||||||
|
|
||||||
|
if ( isCoding() && ! other.isCoding() ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if ( ! isCoding() && other.isCoding() ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
|
||||||
|
// of the effects themselves. Effects with the same impact are tie-broken using the
|
||||||
|
// functional class of the effect:
|
||||||
|
|
||||||
|
if ( impact.isHigherImpactThan(other.impact) ) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
else if ( impact.isSameImpactAs(other.impact) ) {
|
||||||
|
return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Map<String, Object> getAnnotations() {
|
||||||
|
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
|
||||||
|
|
||||||
|
addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
|
||||||
|
addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
|
||||||
|
addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
|
||||||
|
addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
|
||||||
|
|
||||||
|
return annotations;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
|
||||||
|
// Only add annotations for keys associated with non-empty values:
|
||||||
|
if ( keyValue != null && keyValue.trim().length() > 0 ) {
|
||||||
|
annotations.put(keyName, keyValue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -17,6 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fraction of reads containing spanning deletions at this site.
|
||||||
|
*/
|
||||||
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
@ -19,12 +20,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Counts of bases from SLX, 454, and SOLiD at this site
|
||||||
* User: delangel
|
|
||||||
* Date: 6/29/11
|
|
||||||
* Time: 3:14 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
private String nSLX = "NumSLX";
|
private String nSLX = "NumSLX";
|
||||||
private String n454 ="Num454";
|
private String n454 ="Num454";
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||||
|
|
@ -86,14 +85,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
|
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The INFO field will be annotated with information on the most biologically-significant effect
|
* The INFO field will be annotated with information on the most biologically-significant effect
|
||||||
* listed in the SnpEff output file for each variant.
|
* listed in the SnpEff output file for each variant.
|
||||||
*/
|
*/
|
||||||
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
|
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
|
||||||
public RodBinding<SnpEffFeature> snpEffFile;
|
public RodBinding<VariantContext> snpEffFile;
|
||||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
|
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
||||||
|
|
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
@Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
|
||||||
protected boolean indelsOnly = false;
|
protected boolean indelsOnly = false;
|
||||||
|
|
||||||
|
@Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
|
||||||
|
public String familyStr = null;
|
||||||
|
|
||||||
|
@Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
|
||||||
|
public double minGenotypeQualityP = 0.0;
|
||||||
|
|
||||||
private VariantAnnotatorEngine engine;
|
private VariantAnnotatorEngine engine;
|
||||||
|
|
||||||
private Collection<VariantContext> indelBufferContext;
|
private Collection<VariantContext> indelBufferContext;
|
||||||
|
|
@ -203,9 +209,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( USE_ALL_ANNOTATIONS )
|
if ( USE_ALL_ANNOTATIONS )
|
||||||
engine = new VariantAnnotatorEngine(this);
|
engine = new VariantAnnotatorEngine(this, getToolkit());
|
||||||
else
|
else
|
||||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
|
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
|
||||||
engine.initializeExpressions(expressionsToUse);
|
engine.initializeExpressions(expressionsToUse);
|
||||||
|
|
||||||
// setup the header fields
|
// setup the header fields
|
||||||
|
|
@ -217,6 +223,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
||||||
hInfo.add(line);
|
hInfo.add(line);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||||
|
|
||||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||||
vcfWriter.writeHeader(vcfHeader);
|
vcfWriter.writeHeader(vcfHeader);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,13 +26,11 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||||
|
|
@ -49,6 +47,7 @@ public class VariantAnnotatorEngine {
|
||||||
|
|
||||||
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||||
private AnnotatorCompatibleWalker walker;
|
private AnnotatorCompatibleWalker walker;
|
||||||
|
private GenomeAnalysisEngine toolkit;
|
||||||
|
|
||||||
private static class VAExpression {
|
private static class VAExpression {
|
||||||
|
|
||||||
|
|
@ -74,16 +73,18 @@ public class VariantAnnotatorEngine {
|
||||||
}
|
}
|
||||||
|
|
||||||
// use this constructor if you want all possible annotations
|
// use this constructor if you want all possible annotations
|
||||||
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
|
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
|
||||||
this.walker = walker;
|
this.walker = walker;
|
||||||
|
this.toolkit = toolkit;
|
||||||
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
|
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
|
||||||
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
|
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
|
||||||
initializeDBs();
|
initializeDBs();
|
||||||
}
|
}
|
||||||
|
|
||||||
// use this constructor if you want to select specific annotations (and/or interfaces)
|
// use this constructor if you want to select specific annotations (and/or interfaces)
|
||||||
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
|
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
|
||||||
this.walker = walker;
|
this.walker = walker;
|
||||||
|
this.toolkit = toolkit;
|
||||||
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
|
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
|
||||||
initializeDBs();
|
initializeDBs();
|
||||||
}
|
}
|
||||||
|
|
@ -113,6 +114,16 @@ public class VariantAnnotatorEngine {
|
||||||
dbAnnotations.put(rod, rod.getName());
|
dbAnnotations.put(rod, rod.getName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
|
||||||
|
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||||
|
annotation.initialize(walker, toolkit, headerLines);
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||||
|
annotation.initialize(walker, toolkit, headerLines);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
|
||||||
|
|
||||||
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -9,7 +8,8 @@ import java.util.List;
|
||||||
public interface AnnotatorCompatibleWalker {
|
public interface AnnotatorCompatibleWalker {
|
||||||
|
|
||||||
// getter methods for various used bindings
|
// getter methods for various used bindings
|
||||||
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
|
public abstract RodBinding<VariantContext> getVariantRodBinding();
|
||||||
|
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
|
||||||
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
||||||
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
||||||
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
|
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
|
||||||
|
|
|
||||||
|
|
@ -24,18 +24,18 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
|
||||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Set;
|
||||||
|
|
||||||
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
||||||
public abstract class VariantAnnotatorAnnotation {
|
public abstract class VariantAnnotatorAnnotation {
|
||||||
// return the INFO keys
|
// return the INFO keys
|
||||||
public abstract List<String> getKeyNames();
|
public abstract List<String> getKeyNames();
|
||||||
|
|
||||||
|
// initialization method (optional for subclasses, and therefore non-abstract)
|
||||||
|
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
|
||||||
}
|
}
|
||||||
|
|
@ -175,21 +175,16 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2);
|
BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2);
|
||||||
// ignore places where we don't have a variant
|
|
||||||
if ( beagleR2Feature == null )
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
|
|
||||||
BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs);
|
BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs);
|
||||||
|
|
||||||
// ignore places where we don't have a variant
|
|
||||||
if ( beagleProbsFeature == null )
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased);
|
BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased);
|
||||||
|
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a variant
|
||||||
if ( beaglePhasedFeature == null )
|
if ( beagleR2Feature == null || beagleProbsFeature == null || beaglePhasedFeature == null)
|
||||||
return 0;
|
{
|
||||||
|
vcfWriter.add(vc_input);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// get reference base for current position
|
// get reference base for current position
|
||||||
byte refByte = ref.getBase();
|
byte refByte = ref.getBase();
|
||||||
|
|
|
||||||
|
|
@ -63,20 +63,32 @@ import java.util.*;
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* One or more bam files (with proper headers) to be analyzed for coverage statistics
|
* One or more bam files (with proper headers) to be analyzed for coverage statistics
|
||||||
* (Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
|
||||||
* </p>
|
* </p>
|
||||||
*
|
* <p>
|
||||||
|
*(Optional) A REFSEQ Rod to aggregate coverage to the gene level
|
||||||
|
* <p>
|
||||||
|
* (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
|
||||||
|
*</p></p>
|
||||||
* <h2>Output</h2>
|
* <h2>Output</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
|
* Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
|
||||||
|
* </p><p>
|
||||||
* - no suffix: per locus coverage
|
* - no suffix: per locus coverage
|
||||||
|
* </p><p>
|
||||||
* - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
|
* - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
|
||||||
|
* </p><p>
|
||||||
* - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
|
* - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
|
||||||
|
* </p><p>
|
||||||
* - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
|
* - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
|
||||||
|
* </p><p>
|
||||||
* - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
|
* - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
|
||||||
|
* </p><p>
|
||||||
* - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
|
* - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
|
||||||
|
* </p><p>
|
||||||
* - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
|
* - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
|
||||||
|
* </p><p>
|
||||||
* - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
|
* - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
|
||||||
|
* </p><p>
|
||||||
* - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
|
* - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
|
|
@ -84,7 +96,7 @@ import java.util.*;
|
||||||
* <pre>
|
* <pre>
|
||||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
* -R ref.fasta \
|
* -R ref.fasta \
|
||||||
* -T VariantEval \
|
* -T DepthOfCoverage \
|
||||||
* -o file_name_base \
|
* -o file_name_base \
|
||||||
* -I input_bams.list
|
* -I input_bams.list
|
||||||
* [-geneList refSeq.sorted.txt] \
|
* [-geneList refSeq.sorted.txt] \
|
||||||
|
|
|
||||||
|
|
@ -43,8 +43,10 @@ import java.util.List;
|
||||||
* Generates an alternative reference sequence over the specified interval.
|
* Generates an alternative reference sequence over the specified interval.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
* Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
|
||||||
* Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
|
* Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
|
||||||
|
* Note that if there are multiple variants at a site, it takes the first one seen.
|
||||||
|
* Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
|
||||||
*
|
*
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,9 @@ import java.io.PrintStream;
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* The output format can be partially controlled using the provided command-line arguments.
|
* The output format can be partially controlled using the provided command-line arguments.
|
||||||
|
* Specify intervals with the usual -L argument to output only the reference bases within your intervals.
|
||||||
|
* Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
|
||||||
|
* separate fasta sequence (named numerically in order).
|
||||||
*
|
*
|
||||||
* <h2>Input</h2>
|
* <h2>Input</h2>
|
||||||
* <p>
|
* <p>
|
||||||
|
|
|
||||||
|
|
@ -23,7 +23,7 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.genotype;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
||||||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
* Time: 6:46:09 PM
|
* Time: 6:46:09 PM
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
public enum DiploidGenotype {
|
enum DiploidGenotype {
|
||||||
AA ('A', 'A'),
|
AA ('A', 'A'),
|
||||||
AC ('A', 'C'),
|
AC ('A', 'C'),
|
||||||
AG ('A', 'G'),
|
AG ('A', 'G'),
|
||||||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
import org.broadinstitute.sting.utils.pileup.FragmentPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
@ -276,8 +275,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
|
||||||
if ( elt.isReducedRead() ) {
|
if ( elt.isReducedRead() ) {
|
||||||
// reduced read representation
|
// reduced read representation
|
||||||
byte qual = elt.getReducedQual();
|
byte qual = elt.getReducedQual();
|
||||||
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
|
if ( BaseUtils.isRegularBase( elt.getBase() )) {
|
||||||
return elt.getReducedCount(); // we added nObs bases here
|
add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
|
||||||
|
return elt.getReducedCount(); // we added nObs bases here
|
||||||
|
} else // odd bases or deletions => don't use them
|
||||||
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
|
||||||
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
|
return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// code for testing purposes
|
// code for testing purposes
|
||||||
//
|
//
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static boolean PRINT_LIKELIHOODS = false;
|
|
||||||
private final static int N_CYCLES = 1;
|
|
||||||
private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
|
|
||||||
private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
|
|
||||||
private final static boolean COMPARE_TO_GS = false;
|
|
||||||
|
|
||||||
public enum ExactCalculation {
|
|
||||||
N2_GOLD_STANDARD,
|
|
||||||
LINEAR_EXPERIMENTAL
|
|
||||||
}
|
|
||||||
|
|
||||||
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
|
||||||
|
private final boolean SIMPLE_GREEDY_GENOTYPER = false;
|
||||||
|
private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
|
||||||
|
|
||||||
private boolean SIMPLE_GREEDY_GENOTYPER = false;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
final private ExactCalculation calcToUse;
|
|
||||||
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
|
||||||
super(UAC, N, logger, verboseWriter);
|
super(UAC, N, logger, verboseWriter);
|
||||||
calcToUse = UAC.EXACT_CALCULATION_TYPE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
public void getLog10PNonRef(RefMetaDataTracker tracker,
|
||||||
|
|
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
Map<String, Genotype> GLs, Set<Allele>alleles,
|
Map<String, Genotype> GLs, Set<Allele>alleles,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
double[] log10AlleleFrequencyPosteriors) {
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final int numAlleles = alleles.size();
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
|
||||||
double[] gsPosteriors;
|
|
||||||
if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
|
|
||||||
gsPosteriors = log10AlleleFrequencyPosteriors.clone();
|
|
||||||
|
|
||||||
int idxAA = GenotypeType.AA.ordinal();
|
|
||||||
int idxAB = GenotypeType.AB.ordinal();
|
|
||||||
int idxBB = GenotypeType.BB.ordinal();
|
|
||||||
|
|
||||||
// todo -- remove me after testing
|
|
||||||
if ( N_CYCLES > 1 ) {
|
|
||||||
for ( int i = 0; i < N_CYCLES; i++) {
|
|
||||||
timerGS.restart();
|
|
||||||
linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
|
|
||||||
timerGS.stop();
|
|
||||||
|
|
||||||
timerExpt.restart();
|
|
||||||
linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
|
|
||||||
timerExpt.stop();
|
|
||||||
}
|
|
||||||
|
|
||||||
System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
|
|
||||||
timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
|
|
||||||
}
|
|
||||||
|
|
||||||
int lastK = -1;
|
|
||||||
|
|
||||||
int numAlleles = alleles.size();
|
|
||||||
|
|
||||||
int idxDiag = numAlleles;
|
int idxDiag = numAlleles;
|
||||||
int incr = numAlleles - 1;
|
int incr = numAlleles - 1;
|
||||||
|
|
||||||
double[][] posteriorCache = new double[numAlleles-1][];
|
|
||||||
double[] bestAFguess = new double[numAlleles-1];
|
|
||||||
|
|
||||||
for (int k=1; k < numAlleles; k++) {
|
for (int k=1; k < numAlleles; k++) {
|
||||||
// multi-allelic approximation, part 1: Ideally
|
// multi-allelic approximation, part 1: Ideally
|
||||||
// for each alt allele compute marginal (suboptimal) posteriors -
|
// for each alt allele compute marginal (suboptimal) posteriors -
|
||||||
|
|
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
// So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
|
||||||
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
// 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
|
||||||
|
|
||||||
idxAA = 0;
|
final int idxAA = 0;
|
||||||
idxAB = k;
|
final int idxAB = k;
|
||||||
// yy is always element on the diagonal.
|
// yy is always element on the diagonal.
|
||||||
// 2 alleles: BBelement 2
|
// 2 alleles: BBelement 2
|
||||||
// 3 alleles: BB element 3. CC element 5
|
// 3 alleles: BB element 3. CC element 5
|
||||||
// 4 alleles:
|
// 4 alleles:
|
||||||
idxBB = idxDiag;
|
final int idxBB = idxDiag;
|
||||||
idxDiag += incr--;
|
idxDiag += incr--;
|
||||||
|
|
||||||
// todo - possible cleanup
|
final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
||||||
switch ( calcToUse ) {
|
|
||||||
case N2_GOLD_STANDARD:
|
|
||||||
lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
break;
|
|
||||||
case LINEAR_EXPERIMENTAL:
|
|
||||||
lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (numAlleles > 2) {
|
if (numAlleles > 2) {
|
||||||
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
|
||||||
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
|
||||||
|
|
@ -153,47 +100,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
|
||||||
|
|
||||||
}
|
}
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
// todo -- REMOVE ME AFTER TESTING
|
|
||||||
if ( COMPARE_TO_GS ) {
|
|
||||||
gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
|
|
||||||
|
|
||||||
double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
|
|
||||||
double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
|
|
||||||
boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
|
|
||||||
|
|
||||||
if ( ! eq || PRINT_LIKELIHOODS ) {
|
|
||||||
System.out.printf("----------------------------------------%n");
|
|
||||||
for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
|
|
||||||
double x = log10AlleleFrequencyPosteriors[k];
|
|
||||||
System.out.printf(" %d\t%.2f\t%.2f\t%b%n", k,
|
|
||||||
x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
|
|
||||||
log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
|
|
||||||
}
|
|
||||||
System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
|
|
||||||
ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final double[][] getGLs(Map<String, Genotype> GLs) {
|
private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
|
||||||
double[][] genotypeLikelihoods = new double[GLs.size()+1][];
|
ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
|
||||||
|
|
||||||
int j = 0;
|
genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
|
||||||
for ( Genotype sample : GLs.values() ) {
|
for ( Genotype sample : GLs.values() ) {
|
||||||
j++;
|
|
||||||
|
|
||||||
if ( sample.hasLikelihoods() ) {
|
if ( sample.hasLikelihoods() ) {
|
||||||
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
double[] gls = sample.getLikelihoods().getAsVector();
|
||||||
genotypeLikelihoods[j] = sample.getLikelihoods().getAsVector();
|
|
||||||
|
if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
|
||||||
|
genotypeLikelihoods.add(gls);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return genotypeLikelihoods;
|
return genotypeLikelihoods;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
// -------------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Linearized, ~O(N), implementation.
|
// Linearized, ~O(N), implementation.
|
||||||
|
|
@ -237,90 +162,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// now with banding
|
|
||||||
public int linearExactBanded(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors) {
|
|
||||||
throw new NotImplementedException();
|
|
||||||
// final int numSamples = GLs.size();
|
|
||||||
// final int numChr = 2*numSamples;
|
|
||||||
// final double[][] genotypeLikelihoods = getGLs(GLs);
|
|
||||||
//
|
|
||||||
// final ExactACCache logY = new ExactACCache(numSamples+1);
|
|
||||||
// logY.getkMinus0()[0] = 0.0; // the zero case
|
|
||||||
//
|
|
||||||
// double maxLog10L = Double.NEGATIVE_INFINITY;
|
|
||||||
// boolean done = false;
|
|
||||||
// int lastK = -1;
|
|
||||||
// final int BAND_SIZE = 10;
|
|
||||||
//
|
|
||||||
// for (int k=0; k <= numChr && ! done; k++ ) {
|
|
||||||
// final double[] kMinus0 = logY.getkMinus0();
|
|
||||||
// int jStart = Math.max(k - BAND_SIZE, 1);
|
|
||||||
// int jStop = Math.min(k + BAND_SIZE, numSamples);
|
|
||||||
//
|
|
||||||
// if ( k == 0 ) { // special case for k = 0
|
|
||||||
// for ( int j=1; j <= numSamples; j++ ) {
|
|
||||||
// kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
|
|
||||||
// }
|
|
||||||
// } else { // k > 0
|
|
||||||
// final double[] kMinus1 = logY.getkMinus1();
|
|
||||||
// final double[] kMinus2 = logY.getkMinus2();
|
|
||||||
// Arrays.fill(kMinus0,0);
|
|
||||||
//
|
|
||||||
// for ( int j = jStart; j <= jStop; j++ ) {
|
|
||||||
// final double[] gl = genotypeLikelihoods[j];
|
|
||||||
// final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
|
|
||||||
//
|
|
||||||
// double aa = Double.NEGATIVE_INFINITY;
|
|
||||||
// double ab = Double.NEGATIVE_INFINITY;
|
|
||||||
// if (k < 2*j-1)
|
|
||||||
// aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
|
|
||||||
//
|
|
||||||
// if (k < 2*j)
|
|
||||||
// ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
|
|
||||||
//
|
|
||||||
// double log10Max;
|
|
||||||
// if (k > 1) {
|
|
||||||
// final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
|
|
||||||
// log10Max = approximateLog10SumLog10(aa, ab, bb);
|
|
||||||
// } else {
|
|
||||||
// // we know we aren't considering the BB case, so we can use an optimized log10 function
|
|
||||||
// log10Max = approximateLog10SumLog10(aa, ab);
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // finally, update the L(j,k) value
|
|
||||||
// kMinus0[j] = log10Max - logDenominator;
|
|
||||||
//
|
|
||||||
// String offset = Utils.dupString(' ',k);
|
|
||||||
// System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// // update the posteriors vector
|
|
||||||
// final double log10LofK = kMinus0[jStop];
|
|
||||||
// log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
|
|
||||||
//
|
|
||||||
// // can we abort early?
|
|
||||||
// lastK = k;
|
|
||||||
// maxLog10L = Math.max(maxLog10L, log10LofK);
|
|
||||||
// if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
|
|
||||||
// if ( DEBUG ) System.out.printf(" *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
|
|
||||||
// done = true;
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// logY.rotate();
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// return lastK;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int linearExact(Map<String, Genotype> GLs,
|
public int linearExact(Map<String, Genotype> GLs,
|
||||||
double[] log10AlleleFrequencyPriors,
|
double[] log10AlleleFrequencyPriors,
|
||||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
||||||
final int numSamples = GLs.size();
|
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||||
|
final int numSamples = genotypeLikelihoods.size()-1;
|
||||||
final int numChr = 2*numSamples;
|
final int numChr = 2*numSamples;
|
||||||
final double[][] genotypeLikelihoods = getGLs(GLs);
|
|
||||||
|
|
||||||
final ExactACCache logY = new ExactACCache(numSamples+1);
|
final ExactACCache logY = new ExactACCache(numSamples+1);
|
||||||
logY.getkMinus0()[0] = 0.0; // the zero case
|
logY.getkMinus0()[0] = 0.0; // the zero case
|
||||||
|
|
@ -334,14 +181,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
if ( k == 0 ) { // special case for k = 0
|
if ( k == 0 ) { // special case for k = 0
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
|
kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
|
||||||
}
|
}
|
||||||
} else { // k > 0
|
} else { // k > 0
|
||||||
final double[] kMinus1 = logY.getkMinus1();
|
final double[] kMinus1 = logY.getkMinus1();
|
||||||
final double[] kMinus2 = logY.getkMinus2();
|
final double[] kMinus2 = logY.getkMinus2();
|
||||||
|
|
||||||
for ( int j=1; j <= numSamples; j++ ) {
|
for ( int j=1; j <= numSamples; j++ ) {
|
||||||
final double[] gl = genotypeLikelihoods[j];
|
final double[] gl = genotypeLikelihoods.get(j);
|
||||||
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
||||||
|
|
||||||
double aa = Double.NEGATIVE_INFINITY;
|
double aa = Double.NEGATIVE_INFINITY;
|
||||||
|
|
@ -434,10 +281,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
if ( !vc.isVariant() )
|
if ( !vc.isVariant() )
|
||||||
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
|
||||||
|
|
||||||
boolean multiAllelicRecord = false;
|
|
||||||
|
|
||||||
if (vc.getAlternateAlleles().size() > 1)
|
|
||||||
multiAllelicRecord = true;
|
|
||||||
|
|
||||||
Map<String, Genotype> GLs = vc.getGenotypes();
|
Map<String, Genotype> GLs = vc.getGenotypes();
|
||||||
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
|
||||||
|
|
@ -454,7 +297,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
pathMetricArray[0][0] = 0.0;
|
pathMetricArray[0][0] = 0.0;
|
||||||
|
|
||||||
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
// todo = can't deal with optimal dynamic programming solution with multiallelic records
|
||||||
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
|
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||||
sampleIndices.addAll(GLs.keySet());
|
sampleIndices.addAll(GLs.keySet());
|
||||||
sampleIdx = GLs.size();
|
sampleIdx = GLs.size();
|
||||||
}
|
}
|
||||||
|
|
@ -465,6 +308,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
||||||
|
|
||||||
|
if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL) {
|
||||||
|
//System.out.print(sample.getKey()+":");
|
||||||
|
//for (int k=0; k < likelihoods.length; k++)
|
||||||
|
// System.out.format("%4.2f ",likelihoods[k]);
|
||||||
|
//System.out.println();
|
||||||
|
// all likelihoods are essentially the same: skip this sample and will later on force no call.
|
||||||
|
//sampleIdx++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
sampleIndices.add(sample.getKey());
|
sampleIndices.add(sample.getKey());
|
||||||
|
|
||||||
for (int k=0; k <= AFofMaxLikelihood; k++) {
|
for (int k=0; k <= AFofMaxLikelihood; k++) {
|
||||||
|
|
@ -504,22 +358,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
Genotype g = GLs.get(sample);
|
Genotype g = GLs.get(sample);
|
||||||
if ( !g.hasLikelihoods() )
|
if ( !g.hasLikelihoods() )
|
||||||
continue;
|
continue;
|
||||||
|
// if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
|
||||||
if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
|
// and will add no-call genotype to GL's in a second pass
|
||||||
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
|
||||||
else {
|
|
||||||
int newIdx = tracebackArray[k][startIdx];
|
|
||||||
bestGTguess = startIdx - newIdx;
|
|
||||||
startIdx = newIdx;
|
|
||||||
}
|
|
||||||
|
|
||||||
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
|
|
||||||
double qual = Double.NEGATIVE_INFINITY;
|
double qual = Double.NEGATIVE_INFINITY;
|
||||||
double[] likelihoods = g.getLikelihoods().getAsVector();
|
double[] likelihoods = g.getLikelihoods().getAsVector();
|
||||||
|
|
||||||
|
if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
|
||||||
|
bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int newIdx = tracebackArray[k][startIdx];;
|
||||||
|
bestGTguess = startIdx - newIdx;
|
||||||
|
startIdx = newIdx;
|
||||||
|
}
|
||||||
|
|
||||||
/* System.out.format("Sample: %s GL:",sample);
|
/* System.out.format("Sample: %s GL:",sample);
|
||||||
for (int i=0; i < likelihoods.length; i++)
|
for (int i=0; i < likelihoods.length; i++)
|
||||||
System.out.format("%1.4f ",likelihoods[i]);
|
System.out.format("%1.4f, ",likelihoods[i]);
|
||||||
*/
|
*/
|
||||||
|
|
||||||
for (int i=0; i < likelihoods.length; i++) {
|
for (int i=0; i < likelihoods.length; i++) {
|
||||||
|
|
@ -570,83 +427,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return calls;
|
|
||||||
}
|
|
||||||
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Gold standard, but O(N^2), implementation.
|
|
||||||
//
|
|
||||||
// TODO -- remove me for clarity in this code
|
|
||||||
//
|
|
||||||
// -------------------------------------------------------------------------------------
|
|
||||||
public int gdaN2GoldStandard(Map<String, Genotype> GLs,
|
|
||||||
double[] log10AlleleFrequencyPriors,
|
|
||||||
double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
|
|
||||||
int numSamples = GLs.size();
|
|
||||||
int numChr = 2*numSamples;
|
|
||||||
|
|
||||||
double[][] logYMatrix = new double[1+numSamples][1+numChr];
|
|
||||||
|
|
||||||
for (int i=0; i <=numSamples; i++)
|
|
||||||
for (int j=0; j <=numChr; j++)
|
|
||||||
logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
//YMatrix[0][0] = 1.0;
|
|
||||||
logYMatrix[0][0] = 0.0;
|
|
||||||
int j=0;
|
|
||||||
|
|
||||||
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
|
||||||
j++;
|
|
||||||
|
|
||||||
if ( !sample.getValue().hasLikelihoods() )
|
if ( !sample.getValue().hasLikelihoods() )
|
||||||
continue;
|
continue;
|
||||||
|
Genotype g = GLs.get(sample.getKey());
|
||||||
|
|
||||||
//double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
|
double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
|
||||||
double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
|
|
||||||
//double logDenominator = Math.log10(2.0*j*(2.0*j-1));
|
|
||||||
double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
|
|
||||||
|
|
||||||
// special treatment for k=0: iteration reduces to:
|
if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
|
||||||
//YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
continue; // regular likelihoods
|
||||||
logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
|
|
||||||
|
|
||||||
for (int k=1; k <= 2*j; k++ ) {
|
ArrayList<Allele> myAlleles = new ArrayList<Allele>();
|
||||||
|
|
||||||
//double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
|
|
||||||
double logNumerator[];
|
|
||||||
logNumerator = new double[3];
|
|
||||||
if (k < 2*j-1)
|
|
||||||
logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
|
|
||||||
genotypeLikelihoods[idxAA];
|
|
||||||
else
|
|
||||||
logNumerator[0] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
|
|
||||||
if (k < 2*j)
|
|
||||||
logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
|
|
||||||
genotypeLikelihoods[idxAB];
|
|
||||||
else
|
|
||||||
logNumerator[1] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
if (k > 1)
|
|
||||||
logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
|
|
||||||
genotypeLikelihoods[idxBB];
|
|
||||||
else
|
|
||||||
logNumerator[2] = Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
double logNum = MathUtils.softMax(logNumerator);
|
|
||||||
|
|
||||||
//YMatrix[j][k] = num/den;
|
|
||||||
logYMatrix[j][k] = logNum - logDenominator;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
double qual = Genotype.NO_NEG_LOG_10PERROR;
|
||||||
|
myAlleles.add(Allele.NO_CALL);
|
||||||
|
myAlleles.add(Allele.NO_CALL);
|
||||||
|
//System.out.println(myAlleles.toString());
|
||||||
|
calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false));
|
||||||
}
|
}
|
||||||
|
return calls;
|
||||||
for (int k=0; k <= numChr; k++)
|
|
||||||
log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
|
|
||||||
|
|
||||||
return numChr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
|
||||||
|
|
@ -657,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
||||||
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
System.out.printf(" %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,10 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
|
||||||
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
|
||||||
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
|
@ -70,9 +71,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
// gdebug removeme
|
// gdebug removeme
|
||||||
// todo -cleanup
|
// todo -cleanup
|
||||||
private HaplotypeIndelErrorModel model;
|
|
||||||
private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
|
|
||||||
//
|
|
||||||
private GenomeLoc lastSiteVisited;
|
private GenomeLoc lastSiteVisited;
|
||||||
private ArrayList<Allele> alleleList;
|
private ArrayList<Allele> alleleList;
|
||||||
|
|
||||||
|
|
@ -83,26 +81,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
|
|
||||||
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
|
||||||
super(UAC, logger);
|
super(UAC, logger);
|
||||||
if (UAC.GSA_PRODUCTION_ONLY == false) {
|
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,UAC.OUTPUT_DEBUG_INDEL_INFO);
|
||||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
|
||||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
|
|
||||||
useOldWrongHorribleHackedUpLikelihoodModel = false;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
useOldWrongHorribleHackedUpLikelihoodModel = true;
|
|
||||||
double INSERTION_START_PROBABILITY = 1e-3;
|
|
||||||
|
|
||||||
double INSERTION_END_PROBABILITY = 0.5;
|
|
||||||
|
|
||||||
double ALPHA_DELETION_PROBABILITY = 1e-3;
|
|
||||||
|
|
||||||
|
|
||||||
model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
|
|
||||||
INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
|
|
||||||
}
|
|
||||||
|
|
||||||
pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
|
|
||||||
UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
|
|
||||||
alleleList = new ArrayList<Allele>();
|
alleleList = new ArrayList<Allele>();
|
||||||
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
|
||||||
|
|
@ -321,7 +300,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
haplotypeMap.clear();
|
haplotypeMap.clear();
|
||||||
|
|
||||||
if (getAlleleListFromVCF) {
|
if (getAlleleListFromVCF) {
|
||||||
for( final VariantContext vc_input : tracker.getValues(UAC.alleles) ) {
|
for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
|
||||||
if( vc_input != null &&
|
if( vc_input != null &&
|
||||||
allowableTypes.contains(vc_input.getType()) &&
|
allowableTypes.contains(vc_input.getType()) &&
|
||||||
ref.getLocus().getStart() == vc_input.getStart()) {
|
ref.getLocus().getStart() == vc_input.getStart()) {
|
||||||
|
|
@ -382,20 +361,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
|
||||||
int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
|
||||||
int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
|
||||||
|
|
||||||
if (useOldWrongHorribleHackedUpLikelihoodModel) {
|
final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
|
||||||
numPrefBases = 20;
|
final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
|
||||||
hsize=80;
|
final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
|
||||||
}
|
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
|
||||||
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
(int)ref.getWindow().size(), loc.getStart(), numPrefBases);
|
||||||
//System.out.println(eventLength);
|
//System.out.println(eventLength);
|
||||||
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
|
haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
|
||||||
ref, hsize, numPrefBases);
|
ref, hsize, numPrefBases);
|
||||||
|
|
||||||
// For each sample, get genotype likelihoods based on pileup
|
// For each sample, get genotype likelihoods based on pileup
|
||||||
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
// compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
|
||||||
|
|
@ -412,17 +388,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
pileup = context.getBasePileup();
|
pileup = context.getBasePileup();
|
||||||
|
|
||||||
if (pileup != null ) {
|
if (pileup != null ) {
|
||||||
double[] genotypeLikelihoods;
|
final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
||||||
if (useOldWrongHorribleHackedUpLikelihoodModel)
|
|
||||||
genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
|
|
||||||
else
|
|
||||||
genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
|
|
||||||
|
|
||||||
|
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
|
|
||||||
// which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example
|
|
||||||
// for 3 alleles it's 00 01 11 02 12 22
|
|
||||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
|
||||||
alleleList,
|
alleleList,
|
||||||
genotypeLikelihoods,
|
genotypeLikelihoods,
|
||||||
getFilteredDepth(pileup)));
|
getFilteredDepth(pileup)));
|
||||||
|
|
@ -444,4 +412,16 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
|
||||||
return indelLikelihoodMap.get();
|
return indelLikelihoodMap.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
|
||||||
|
// so that per-sample DP will include deletions covering the event.
|
||||||
|
protected int getFilteredDepth(ReadBackedPileup pileup) {
|
||||||
|
int count = 0;
|
||||||
|
for ( PileupElement p : pileup ) {
|
||||||
|
if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) )
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -26,16 +26,14 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||||
|
|
@ -58,25 +56,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
|
||||||
if ( tracker == null || ref == null || logger == null )
|
|
||||||
throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
|
|
||||||
VariantContext vc = null;
|
|
||||||
|
|
||||||
// search for usable record
|
|
||||||
for( final VariantContext vc_input : tracker.getValues(allelesBinding) ) {
|
|
||||||
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
|
||||||
if ( vc == null ) {
|
|
||||||
vc = vc_input;
|
|
||||||
} else {
|
|
||||||
logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return vc;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
public Allele getLikelihoods(RefMetaDataTracker tracker,
|
||||||
ReferenceContext ref,
|
ReferenceContext ref,
|
||||||
Map<String, AlignmentContext> contexts,
|
Map<String, AlignmentContext> contexts,
|
||||||
|
|
@ -96,7 +75,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
if ( alternateAlleleToUse != null ) {
|
if ( alternateAlleleToUse != null ) {
|
||||||
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
bestAlternateAllele = alternateAlleleToUse.getBases()[0];
|
||||||
} else if ( useAlleleFromVCF ) {
|
} else if ( useAlleleFromVCF ) {
|
||||||
VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger, UAC.alleles);
|
VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
|
||||||
|
|
||||||
// ignore places where we don't have a variant
|
// ignore places where we don't have a variant
|
||||||
if ( vc == null )
|
if ( vc == null )
|
||||||
|
|
@ -143,8 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
|
||||||
aList.add(refAllele);
|
aList.add(refAllele);
|
||||||
aList.add(altAllele);
|
aList.add(altAllele);
|
||||||
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
|
||||||
|
|
||||||
|
// normalize in log space so that max element is zero.
|
||||||
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
|
||||||
aList, dlike, getFilteredDepth(pileup)));
|
aList, MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
|
||||||
}
|
}
|
||||||
|
|
||||||
return refAllele;
|
return refAllele;
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.commandline.RodBinding;
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
|
|
|
||||||
|
|
@ -143,35 +143,21 @@ public class UnifiedArgumentCollection {
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
@Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
|
||||||
public int INDEL_HAPLOTYPE_SIZE = 80;
|
public int INDEL_HAPLOTYPE_SIZE = 80;
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
|
|
||||||
public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
|
|
||||||
//gdebug+
|
//gdebug+
|
||||||
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
|
// experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
|
||||||
@Hidden
|
// @Hidden
|
||||||
@Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
|
// @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
|
||||||
public boolean GET_GAP_PENALTIES_FROM_DATA = false;
|
// public boolean GET_GAP_PENALTIES_FROM_DATA = false;
|
||||||
|
//
|
||||||
@Hidden
|
// @Hidden
|
||||||
@Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
|
// @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
|
||||||
public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
|
// public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
@Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
|
||||||
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
|
public boolean OUTPUT_DEBUG_INDEL_INFO = false;
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false)
|
|
||||||
public boolean dovit = false;
|
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
|
|
||||||
public boolean GSA_PRODUCTION_ONLY = false;
|
|
||||||
|
|
||||||
@Hidden
|
|
||||||
@Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
|
|
||||||
public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
|
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
@Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
|
||||||
public boolean IGNORE_SNP_ALLELES = false;
|
public boolean IGNORE_SNP_ALLELES = false;
|
||||||
|
|
@ -191,7 +177,6 @@ public class UnifiedArgumentCollection {
|
||||||
|
|
||||||
uac.GLmodel = GLmodel;
|
uac.GLmodel = GLmodel;
|
||||||
uac.AFmodel = AFmodel;
|
uac.AFmodel = AFmodel;
|
||||||
uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
|
|
||||||
uac.heterozygosity = heterozygosity;
|
uac.heterozygosity = heterozygosity;
|
||||||
uac.PCR_error = PCR_error;
|
uac.PCR_error = PCR_error;
|
||||||
uac.GenotypingMode = GenotypingMode;
|
uac.GenotypingMode = GenotypingMode;
|
||||||
|
|
@ -209,15 +194,10 @@ public class UnifiedArgumentCollection {
|
||||||
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
|
uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
|
||||||
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
|
||||||
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
|
||||||
uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
|
|
||||||
uac.alleles = alleles;
|
uac.alleles = alleles;
|
||||||
|
|
||||||
uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
|
|
||||||
uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
|
|
||||||
// todo- arguments to remove
|
// todo- arguments to remove
|
||||||
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
|
uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
|
||||||
uac.dovit = dovit;
|
|
||||||
uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
|
|
||||||
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
|
||||||
|
|
||||||
return uac;
|
return uac;
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.utils.SampleUtils;
|
import org.broadinstitute.sting.utils.SampleUtils;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
|
@ -127,7 +126,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
@ArgumentCollection
|
@ArgumentCollection
|
||||||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
|
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
|
||||||
|
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
|
||||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
||||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||||
|
|
||||||
|
|
@ -210,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
||||||
if ( verboseWriter != null )
|
if ( verboseWriter != null )
|
||||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
||||||
|
|
||||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
|
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
|
||||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
|
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
|
||||||
|
|
||||||
// initialize the header
|
// initialize the header
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
|
|
@ -36,13 +37,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
||||||
import org.broadinstitute.sting.utils.*;
|
import org.broadinstitute.sting.utils.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -236,10 +235,11 @@ public class UnifiedGenotyperEngine {
|
||||||
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
|
||||||
VariantContext vc;
|
VariantContext vc;
|
||||||
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
|
||||||
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
|
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||||
if ( vcInput == null )
|
if ( vcInput == null )
|
||||||
return null;
|
return null;
|
||||||
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
|
vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase());
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// deal with bad/non-standard reference bases
|
// deal with bad/non-standard reference bases
|
||||||
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
|
||||||
|
|
@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine {
|
||||||
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private final static double[] binomialProbabilityDepthCache = new double[10000];
|
||||||
|
static {
|
||||||
|
for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
|
||||||
|
binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final double getRefBinomialProb(final int depth) {
|
||||||
|
if ( depth < binomialProbabilityDepthCache.length )
|
||||||
|
return binomialProbabilityDepthCache[depth];
|
||||||
|
else
|
||||||
|
return MathUtils.binomialProbability(0, depth, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
|
||||||
if ( contexts == null )
|
if ( contexts == null )
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine {
|
||||||
depth = context.getExtendedEventPileup().size();
|
depth = context.getExtendedEventPileup().size();
|
||||||
}
|
}
|
||||||
|
|
||||||
P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
|
P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
|
||||||
|
|
@ -635,7 +650,7 @@ public class UnifiedGenotyperEngine {
|
||||||
// no extended event pileup
|
// no extended event pileup
|
||||||
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
// if we're genotyping given alleles and we have a requested SNP at this position, do SNP
|
||||||
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
|
||||||
VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger, UAC.alleles);
|
VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
|
||||||
if (vcInput == null)
|
if (vcInput == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -741,4 +756,23 @@ public class UnifiedGenotyperEngine {
|
||||||
|
|
||||||
return afcm;
|
return afcm;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
|
||||||
|
if ( tracker == null || ref == null || logger == null )
|
||||||
|
throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
|
||||||
|
VariantContext vc = null;
|
||||||
|
|
||||||
|
// search for usable record
|
||||||
|
for( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
|
||||||
|
if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
|
||||||
|
if ( vc == null ) {
|
||||||
|
vc = vc_input;
|
||||||
|
} else {
|
||||||
|
logger.warn("Multiple valid VCF records detected in the alleles input file at site " + ref.getLocus() + ", only considering the first record");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return vc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,9 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.indels;
|
package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
|
|
@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
|
||||||
baseMatchArray = new double[MAX_CACHED_QUAL+1];
|
baseMatchArray = new double[MAX_CACHED_QUAL+1];
|
||||||
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
|
baseMismatchArray = new double[MAX_CACHED_QUAL+1];
|
||||||
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
|
for (int k=1; k <= MAX_CACHED_QUAL; k++) {
|
||||||
double baseProb = QualityUtils.qualToProb(k);
|
double baseProb = QualityUtils.qualToProb((byte)k);
|
||||||
|
|
||||||
|
|
||||||
baseMatchArray[k] = probToQual(baseProb);
|
baseMatchArray[k] = probToQual(baseProb);
|
||||||
|
|
|
||||||
|
|
@ -28,9 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.indels;
|
||||||
import net.sf.samtools.Cigar;
|
import net.sf.samtools.Cigar;
|
||||||
import net.sf.samtools.CigarElement;
|
import net.sf.samtools.CigarElement;
|
||||||
import net.sf.samtools.CigarOperator;
|
import net.sf.samtools.CigarOperator;
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.utils.Haplotype;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.genotype.Haplotype;
|
|
||||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||||
|
|
@ -50,36 +51,8 @@ import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Reca
|
||||||
|
|
||||||
|
|
||||||
public class PairHMMIndelErrorModel {
|
public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
|
|
||||||
public static final int BASE_QUAL_THRESHOLD = 20;
|
public static final int BASE_QUAL_THRESHOLD = 20;
|
||||||
|
|
||||||
|
|
||||||
private static final int MATCH_OFFSET = 0;
|
|
||||||
private static final int X_OFFSET = 1;
|
|
||||||
private static final int Y_OFFSET = 2;
|
|
||||||
|
|
||||||
private static final int DIAG = 0;
|
|
||||||
private static final int UP = 1;
|
|
||||||
private static final int LEFT = 2;
|
|
||||||
|
|
||||||
private static final int DIAG_GOTO_M = 0;
|
|
||||||
private static final int DIAG_GOTO_X = 1;
|
|
||||||
private static final int DIAG_GOTO_Y = 2;
|
|
||||||
|
|
||||||
private static final int UP_GOTO_M = 4;
|
|
||||||
private static final int UP_GOTO_X = 5;
|
|
||||||
private static final int UP_GOTO_Y = 6;
|
|
||||||
|
|
||||||
private static final int LEFT_GOTO_M = 8;
|
|
||||||
private static final int LEFT_GOTO_X = 9;
|
|
||||||
private static final int LEFT_GOTO_Y = 10;
|
|
||||||
|
|
||||||
private static final int[] ACTIONS_M = {DIAG_GOTO_M, DIAG_GOTO_X, DIAG_GOTO_Y};
|
|
||||||
private static final int[] ACTIONS_X = {UP_GOTO_M, UP_GOTO_X, UP_GOTO_Y};
|
|
||||||
private static final int[] ACTIONS_Y = {LEFT_GOTO_M, LEFT_GOTO_X, LEFT_GOTO_Y};
|
|
||||||
|
|
||||||
|
|
||||||
private final double logGapOpenProbability;
|
private final double logGapOpenProbability;
|
||||||
private final double logGapContinuationProbability;
|
private final double logGapContinuationProbability;
|
||||||
|
|
||||||
|
|
@ -100,36 +73,13 @@ public class PairHMMIndelErrorModel {
|
||||||
private static final double MIN_GAP_CONT_PENALTY = 10.0;
|
private static final double MIN_GAP_CONT_PENALTY = 10.0;
|
||||||
private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
|
private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
|
||||||
|
|
||||||
|
|
||||||
private boolean doViterbi = false;
|
|
||||||
|
|
||||||
private final boolean useAffineGapModel = true;
|
|
||||||
private boolean doContextDependentPenalties = false;
|
|
||||||
|
|
||||||
private final double[] GAP_OPEN_PROB_TABLE;
|
private final double[] GAP_OPEN_PROB_TABLE;
|
||||||
private final double[] GAP_CONT_PROB_TABLE;
|
private final double[] GAP_CONT_PROB_TABLE;
|
||||||
|
|
||||||
private boolean getGapPenaltiesFromFile = false;
|
|
||||||
|
|
||||||
private int SMOOTHING = 1;
|
|
||||||
private int MAX_QUALITY_SCORE = 50;
|
|
||||||
private int PRESERVE_QSCORES_LESS_THAN = 5;
|
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Private Member Variables
|
// Private Member Variables
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
//copy+
|
|
||||||
/* private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
|
|
||||||
private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
|
|
||||||
private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
|
|
||||||
private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
|
|
||||||
private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
|
|
||||||
protected static final String EOF_MARKER = "EOF";
|
|
||||||
private long numReadsWithMalformedColorSpace = 0;
|
|
||||||
private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
|
|
||||||
private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
|
|
||||||
*/
|
|
||||||
//copy-
|
|
||||||
static {
|
static {
|
||||||
LOG_ONE_HALF= -Math.log10(2.0);
|
LOG_ONE_HALF= -Math.log10(2.0);
|
||||||
END_GAP_COST = LOG_ONE_HALF;
|
END_GAP_COST = LOG_ONE_HALF;
|
||||||
|
|
@ -145,141 +95,9 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit,boolean gpf, File RECAL_FILE) {
|
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) {
|
||||||
|
|
||||||
this(indelGOP, indelGCP, deb, doCDP, dovit);
|
|
||||||
this.getGapPenaltiesFromFile = gpf;
|
|
||||||
|
|
||||||
// read data from recal file
|
|
||||||
// gdebug - start copy from TableRecalibrationWalker
|
|
||||||
/* if (gpf) {
|
|
||||||
boolean sawEOF = false;
|
|
||||||
boolean REQUIRE_EOF = false;
|
|
||||||
|
|
||||||
int lineNumber = 0;
|
|
||||||
boolean foundAllCovariates = false;
|
|
||||||
// Get a list of all available covariates
|
|
||||||
final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
|
|
||||||
|
|
||||||
try {
|
|
||||||
for ( String line : new XReadLines(RECAL_FILE) ) {
|
|
||||||
lineNumber++;
|
|
||||||
if ( EOF_MARKER.equals(line) ) {
|
|
||||||
sawEOF = true;
|
|
||||||
} else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() ) {
|
|
||||||
; // Skip over the comment lines, (which start with '#')
|
|
||||||
}
|
|
||||||
// Read in the covariates that were used from the input file
|
|
||||||
else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
|
|
||||||
if( foundAllCovariates ) {
|
|
||||||
throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
|
|
||||||
} else { // Found the covariate list in input file, loop through all of them and instantiate them
|
|
||||||
String[] vals = line.split(",");
|
|
||||||
for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
|
|
||||||
boolean foundClass = false;
|
|
||||||
for( Class<?> covClass : classes ) {
|
|
||||||
if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
|
|
||||||
foundClass = true;
|
|
||||||
try {
|
|
||||||
Covariate covariate = (Covariate)covClass.newInstance();
|
|
||||||
requestedCovariates.add( covariate );
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DynamicClassResolutionException(covClass, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if( !foundClass ) {
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else { // Found a line of data
|
|
||||||
if( !foundAllCovariates ) {
|
|
||||||
foundAllCovariates = true;
|
|
||||||
|
|
||||||
// At this point all the covariates should have been found and initialized
|
|
||||||
if( requestedCovariates.size() < 2 ) {
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
|
|
||||||
}
|
|
||||||
|
|
||||||
final boolean createCollapsedTables = true;
|
|
||||||
|
|
||||||
// Initialize any covariate member variables using the shared argument collection
|
|
||||||
for( Covariate cov : requestedCovariates ) {
|
|
||||||
cov.initialize( RAC );
|
|
||||||
}
|
|
||||||
// Initialize the data hashMaps
|
|
||||||
dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
|
|
||||||
|
|
||||||
}
|
|
||||||
addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} catch ( FileNotFoundException e ) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
|
|
||||||
} catch ( NumberFormatException e ) {
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( !sawEOF ) {
|
|
||||||
final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
|
|
||||||
if ( REQUIRE_EOF )
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
|
|
||||||
}
|
|
||||||
|
|
||||||
if( dataManager == null ) {
|
|
||||||
throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the tables of empirical quality scores that will be used in the sequential calculation
|
|
||||||
dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
|
|
||||||
}
|
|
||||||
// debug end copy
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
private void addCSVData(final File file, final String line) {
|
|
||||||
final String[] vals = line.split(",");
|
|
||||||
|
|
||||||
// Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
|
|
||||||
if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
|
|
||||||
throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
|
|
||||||
" --Perhaps the read group string contains a comma and isn't being parsed correctly.");
|
|
||||||
}
|
|
||||||
|
|
||||||
final Object[] key = new Object[requestedCovariates.size()];
|
|
||||||
Covariate cov;
|
|
||||||
int iii;
|
|
||||||
for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
|
|
||||||
cov = requestedCovariates.get( iii );
|
|
||||||
key[iii] = cov.getValue( vals[iii] );
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new datum using the number of observations, number of mismatches, and reported quality score
|
|
||||||
final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
|
|
||||||
// Add that datum to all the collapsed tables which will be used in the sequential calculation
|
|
||||||
dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
|
|
||||||
this(indelGOP, indelGCP, deb, doCDP);
|
|
||||||
this.doViterbi = dovit;
|
|
||||||
}
|
|
||||||
|
|
||||||
public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
|
|
||||||
|
|
||||||
|
|
||||||
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
|
this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
|
||||||
this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
|
this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
|
||||||
this.doContextDependentPenalties = doCDP;
|
|
||||||
this.DEBUG = deb;
|
this.DEBUG = deb;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -313,132 +131,6 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private double computeReadLikelihoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) {
|
|
||||||
final int X_METRIC_LENGTH = readBases.length+1;
|
|
||||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
|
||||||
|
|
||||||
// initialize path metric and traceback memories for likelihood computation
|
|
||||||
double[][] pathMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
int[][] bestMetricArray = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
|
|
||||||
pathMetricArray[0][0]= 0;//Double.NEGATIVE_INFINITY;
|
|
||||||
|
|
||||||
for (int i=1; i < X_METRIC_LENGTH; i++) {
|
|
||||||
pathMetricArray[i][0] = 0;
|
|
||||||
bestMetricArray[i][0] = UP;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int j=1; j < Y_METRIC_LENGTH; j++) {
|
|
||||||
pathMetricArray[0][j] = 0;//logGapOpenProbability + (j-1) * logGapContinuationProbability;
|
|
||||||
bestMetricArray[0][j] = LEFT;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
|
|
||||||
for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
|
|
||||||
|
|
||||||
byte x = readBases[indI-1];
|
|
||||||
byte y = haplotypeBases[indJ-1];
|
|
||||||
byte qual = readQuals[indI-1];
|
|
||||||
|
|
||||||
double bestMetric = 0.0;
|
|
||||||
int bestMetricIdx = 0;
|
|
||||||
|
|
||||||
// compute metric for match/mismatch
|
|
||||||
// workaround for reads whose bases quality = 0,
|
|
||||||
if (qual < 1)
|
|
||||||
qual = 1;
|
|
||||||
|
|
||||||
if (qual > MAX_CACHED_QUAL)
|
|
||||||
qual = MAX_CACHED_QUAL;
|
|
||||||
|
|
||||||
double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
|
||||||
double[] metrics = new double[3];
|
|
||||||
|
|
||||||
metrics[DIAG] = pathMetricArray[indI-1][indJ-1] + pBaseRead;
|
|
||||||
metrics[UP] = pathMetricArray[indI-1][indJ] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
|
|
||||||
metrics[LEFT] = pathMetricArray[indI][indJ-1] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
|
|
||||||
|
|
||||||
if (doViterbi) {
|
|
||||||
bestMetricIdx = MathUtils.maxElementIndex(metrics);
|
|
||||||
bestMetric = metrics[bestMetricIdx];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bestMetric = MathUtils.softMax(metrics);
|
|
||||||
|
|
||||||
pathMetricArray[indI][indJ] = bestMetric;
|
|
||||||
bestMetricArray[indI][indJ] = bestMetricIdx;
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
double bestMetric=0.0;
|
|
||||||
int bestMetricIdx=0,bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
|
|
||||||
|
|
||||||
for (int i=0; i < X_METRIC_LENGTH; i ++ ) {
|
|
||||||
int j= Y_METRIC_LENGTH-1;
|
|
||||||
|
|
||||||
if (pathMetricArray[i][j] > bestMetric) {
|
|
||||||
bestMetric = pathMetricArray[i][j];
|
|
||||||
bestI = i;
|
|
||||||
bestJ = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int j=0; j < Y_METRIC_LENGTH; j++ ) {
|
|
||||||
int i= X_METRIC_LENGTH-1;
|
|
||||||
if (pathMetricArray[i][j] >= bestMetric) {
|
|
||||||
bestMetric = pathMetricArray[i][j];
|
|
||||||
bestI = i;
|
|
||||||
bestJ = j;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DEBUG && doViterbi) {
|
|
||||||
|
|
||||||
String haplotypeString = new String (haplotypeBases);
|
|
||||||
String readString = new String(readBases);
|
|
||||||
|
|
||||||
|
|
||||||
int i = bestI;
|
|
||||||
int j = bestJ;
|
|
||||||
|
|
||||||
|
|
||||||
System.out.println("Simple NW");
|
|
||||||
|
|
||||||
while (i >0 || j >0) {
|
|
||||||
bestMetricIdx = bestMetricArray[i][j];
|
|
||||||
System.out.print(bestMetricIdx);
|
|
||||||
if (bestMetricIdx == UP) {
|
|
||||||
// insert gap in Y
|
|
||||||
haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
|
|
||||||
i--;
|
|
||||||
} else if (bestMetricIdx == LEFT) {
|
|
||||||
readString = readString.substring(0,i)+"-"+readString.substring(i);
|
|
||||||
j--;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
i--; j--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
System.out.println("\nAlignment: ");
|
|
||||||
System.out.println("R:"+readString);
|
|
||||||
System.out.println("H:"+haplotypeString);
|
|
||||||
System.out.println();
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
if (DEBUG)
|
|
||||||
System.out.format("Likelihood: %5.4f\n", bestMetric);
|
|
||||||
|
|
||||||
return bestMetric;
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
|
static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
|
||||||
// compute forward hrun length, example:
|
// compute forward hrun length, example:
|
||||||
// AGGTGACCCCCCTGAGAG
|
// AGGTGACCCCCCTGAGAG
|
||||||
|
|
@ -479,14 +171,10 @@ public class PairHMMIndelErrorModel {
|
||||||
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
final int Y_METRIC_LENGTH = haplotypeBases.length+1;
|
||||||
|
|
||||||
// initialize path metric and traceback memories for likelihood computation
|
// initialize path metric and traceback memories for likelihood computation
|
||||||
double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
||||||
int[][] bestActionArrayM = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
int[][] bestActionArrayX = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
int[][] bestActionArrayY = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
|
|
||||||
|
|
||||||
double c,d;
|
|
||||||
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
|
matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
|
||||||
|
|
||||||
for (int i=1; i < X_METRIC_LENGTH; i++) {
|
for (int i=1; i < X_METRIC_LENGTH; i++) {
|
||||||
|
|
@ -494,8 +182,6 @@ public class PairHMMIndelErrorModel {
|
||||||
matchMetricArray[i][0] = Double.NEGATIVE_INFINITY;
|
matchMetricArray[i][0] = Double.NEGATIVE_INFINITY;
|
||||||
YMetricArray[i][0] = Double.NEGATIVE_INFINITY;
|
YMetricArray[i][0] = Double.NEGATIVE_INFINITY;
|
||||||
XMetricArray[i][0] = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
|
XMetricArray[i][0] = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
|
||||||
|
|
||||||
bestActionArrayX[i][0] = bestActionArrayY[i][0] = bestActionArrayM[i][0] = UP_GOTO_X;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int j=1; j < Y_METRIC_LENGTH; j++) {
|
for (int j=1; j < Y_METRIC_LENGTH; j++) {
|
||||||
|
|
@ -503,188 +189,46 @@ public class PairHMMIndelErrorModel {
|
||||||
matchMetricArray[0][j] = Double.NEGATIVE_INFINITY;
|
matchMetricArray[0][j] = Double.NEGATIVE_INFINITY;
|
||||||
XMetricArray[0][j] = Double.NEGATIVE_INFINITY;
|
XMetricArray[0][j] = Double.NEGATIVE_INFINITY;
|
||||||
YMetricArray[0][j] = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
|
YMetricArray[0][j] = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
|
||||||
|
|
||||||
bestActionArrayY[0][j] = bestActionArrayM[0][j] = bestActionArrayX[0][j] = LEFT_GOTO_Y;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
|
for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
|
||||||
int im1 = indI-1;
|
final int im1 = indI-1;
|
||||||
for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
|
for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
|
||||||
int jm1 = indJ-1;
|
final int jm1 = indJ-1;
|
||||||
byte x = readBases[im1];
|
final byte x = readBases[im1];
|
||||||
byte y = haplotypeBases[jm1];
|
final byte y = haplotypeBases[jm1];
|
||||||
byte qual = readQuals[im1];
|
final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
|
||||||
|
final double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
||||||
double bestMetric = 0.0;
|
|
||||||
int bestMetricIdx = 0;
|
|
||||||
|
|
||||||
// compute metric for match/mismatch
|
|
||||||
// workaround for reads whose bases quality = 0,
|
|
||||||
if (qual < 1)
|
|
||||||
qual = 1;
|
|
||||||
|
|
||||||
if (qual > MAX_CACHED_QUAL)
|
|
||||||
qual = MAX_CACHED_QUAL;
|
|
||||||
|
|
||||||
double pBaseRead = (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
|
|
||||||
|
|
||||||
|
|
||||||
double[] metrics = new double[3];
|
|
||||||
|
|
||||||
|
|
||||||
if (doViterbi) {
|
|
||||||
// update match array
|
|
||||||
metrics[MATCH_OFFSET] = matchMetricArray[im1][jm1] + pBaseRead;
|
|
||||||
metrics[X_OFFSET] = XMetricArray[im1][jm1] + pBaseRead;
|
|
||||||
metrics[Y_OFFSET] = YMetricArray[im1][jm1] + pBaseRead;
|
|
||||||
|
|
||||||
bestMetricIdx = MathUtils.maxElementIndex(metrics);
|
|
||||||
bestMetric = metrics[bestMetricIdx];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
|
|
||||||
YMetricArray[im1][jm1] + pBaseRead);
|
|
||||||
|
|
||||||
|
double bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead,
|
||||||
|
XMetricArray[im1][jm1] + pBaseRead,
|
||||||
|
YMetricArray[im1][jm1] + pBaseRead);
|
||||||
matchMetricArray[indI][indJ] = bestMetric;
|
matchMetricArray[indI][indJ] = bestMetric;
|
||||||
bestActionArrayM[indI][indJ] = ACTIONS_M[bestMetricIdx];
|
|
||||||
|
|
||||||
// update X array
|
// update X array
|
||||||
// State X(i,j): X(1:i) aligned to a gap in Y(1:j).
|
// State X(i,j): X(1:i) aligned to a gap in Y(1:j).
|
||||||
// When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
|
// When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
|
||||||
|
|
||||||
//c = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
|
final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||||
//d = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
|
final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||||
if (getGapPenaltiesFromFile) {
|
bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
|
||||||
c = currentGOP[im1];
|
|
||||||
d = logGapContinuationProbability;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
c = currentGOP[jm1];
|
|
||||||
d = currentGCP[jm1];
|
|
||||||
}
|
|
||||||
if (indJ == Y_METRIC_LENGTH-1)
|
|
||||||
c = d = END_GAP_COST;
|
|
||||||
|
|
||||||
if (doViterbi) {
|
|
||||||
metrics[MATCH_OFFSET] = matchMetricArray[im1][indJ] + c;
|
|
||||||
metrics[X_OFFSET] = XMetricArray[im1][indJ] + d;
|
|
||||||
metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability;
|
|
||||||
|
|
||||||
bestMetricIdx = MathUtils.maxElementIndex(metrics);
|
|
||||||
bestMetric = metrics[bestMetricIdx];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c, XMetricArray[im1][indJ] + d);
|
|
||||||
|
|
||||||
XMetricArray[indI][indJ] = bestMetric;
|
XMetricArray[indI][indJ] = bestMetric;
|
||||||
bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx];
|
|
||||||
|
|
||||||
// update Y array
|
// update Y array
|
||||||
//c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
|
//c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
|
||||||
//d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
|
//d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
|
||||||
if (getGapPenaltiesFromFile) {
|
final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
|
||||||
c = currentGOP[im1];
|
final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
|
||||||
d = logGapContinuationProbability;
|
bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
|
||||||
}
|
|
||||||
else {
|
|
||||||
c = currentGOP[jm1];
|
|
||||||
d = currentGCP[jm1];
|
|
||||||
}
|
|
||||||
if (indI == X_METRIC_LENGTH-1)
|
|
||||||
c = d = END_GAP_COST;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if (doViterbi) {
|
|
||||||
metrics[MATCH_OFFSET] = matchMetricArray[indI][jm1] + c;
|
|
||||||
metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability;
|
|
||||||
metrics[Y_OFFSET] = YMetricArray[indI][jm1] + d;
|
|
||||||
|
|
||||||
bestMetricIdx = MathUtils.maxElementIndex(metrics);
|
|
||||||
bestMetric = metrics[bestMetricIdx];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c, YMetricArray[indI][jm1] + d);
|
|
||||||
|
|
||||||
YMetricArray[indI][indJ] = bestMetric;
|
YMetricArray[indI][indJ] = bestMetric;
|
||||||
bestActionArrayY[indI][indJ] = ACTIONS_Y[bestMetricIdx];
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
double bestMetric;
|
final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
|
||||||
double metrics[] = new double[3];
|
final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
|
||||||
int bestTable=0, bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
|
XMetricArray[bestI][bestJ],
|
||||||
metrics[MATCH_OFFSET] = matchMetricArray[bestI][bestJ];
|
YMetricArray[bestI][bestJ]);
|
||||||
metrics[X_OFFSET] = XMetricArray[bestI][bestJ];
|
|
||||||
metrics[Y_OFFSET] = YMetricArray[bestI][bestJ];
|
|
||||||
if (doViterbi) {
|
|
||||||
bestTable = MathUtils.maxElementIndex(metrics);
|
|
||||||
bestMetric = metrics[bestTable];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
bestMetric = MathUtils.softMax(metrics);
|
|
||||||
|
|
||||||
// Do traceback (needed only for debugging!)
|
|
||||||
if (DEBUG && doViterbi) {
|
|
||||||
|
|
||||||
int bestAction;
|
|
||||||
int i = bestI;
|
|
||||||
int j = bestJ;
|
|
||||||
|
|
||||||
|
|
||||||
System.out.println("Affine gap NW");
|
|
||||||
|
|
||||||
|
|
||||||
String haplotypeString = new String (haplotypeBases);
|
|
||||||
String readString = new String(readBases);
|
|
||||||
|
|
||||||
|
|
||||||
while (i >0 || j >0) {
|
|
||||||
if (bestTable == X_OFFSET) {
|
|
||||||
// insert gap in Y
|
|
||||||
haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
|
|
||||||
bestAction = bestActionArrayX[i][j];
|
|
||||||
}
|
|
||||||
else if (bestTable == Y_OFFSET) {
|
|
||||||
readString = readString.substring(0,i)+"-"+readString.substring(i);
|
|
||||||
bestAction = bestActionArrayY[i][j];
|
|
||||||
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
bestAction = bestActionArrayM[i][j];
|
|
||||||
}
|
|
||||||
System.out.print(bestAction);
|
|
||||||
|
|
||||||
|
|
||||||
// bestAction contains action to take at next step
|
|
||||||
// encoding of bestAction: upper 2 bits = direction, lower 2 bits = next table
|
|
||||||
|
|
||||||
// bestTable and nextDirection for next step
|
|
||||||
bestTable = bestAction & 0x3;
|
|
||||||
int nextDirection = bestAction >> 2;
|
|
||||||
if (nextDirection == UP) {
|
|
||||||
i--;
|
|
||||||
} else if (nextDirection == LEFT) {
|
|
||||||
j--;
|
|
||||||
} else { // if (nextDirection == DIAG)
|
|
||||||
i--; j--;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
System.out.println("\nAlignment: ");
|
|
||||||
System.out.println("R:"+readString);
|
|
||||||
System.out.println("H:"+haplotypeString);
|
|
||||||
System.out.println();
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
if (DEBUG)
|
if (DEBUG)
|
||||||
System.out.format("Likelihood: %5.4f\n", bestMetric);
|
System.out.format("Likelihood: %5.4f\n", bestMetric);
|
||||||
|
|
||||||
|
|
@ -707,12 +251,12 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
|
public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
|
||||||
ReferenceContext ref, int eventLength,
|
ReferenceContext ref, int eventLength,
|
||||||
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
|
||||||
|
|
||||||
int numHaplotypes = haplotypeMap.size();
|
int numHaplotypes = haplotypeMap.size();
|
||||||
double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
|
||||||
double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes];
|
final int readCounts[] = new int[pileup.size()];
|
||||||
int readIdx=0;
|
int readIdx=0;
|
||||||
|
|
||||||
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
|
||||||
|
|
@ -723,34 +267,35 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.println(new String(ref.getBases()));
|
System.out.println(new String(ref.getBases()));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (doContextDependentPenalties && !getGapPenaltiesFromFile) {
|
// will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
|
||||||
// will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
|
// todo -- refactor into separate function
|
||||||
|
for (Allele a: haplotypeMap.keySet()) {
|
||||||
|
Haplotype haplotype = haplotypeMap.get(a);
|
||||||
for (Allele a: haplotypeMap.keySet()) {
|
byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
||||||
Haplotype haplotype = haplotypeMap.get(a);
|
double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
|
||||||
byte[] haplotypeBases = haplotype.getBasesAsBytes();
|
double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
|
||||||
double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
|
|
||||||
double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
|
|
||||||
|
|
||||||
// get homopolymer length profile for current haplotype
|
|
||||||
int[] hrunProfile = new int[haplotypeBases.length];
|
|
||||||
getContextHomopolymerLength(haplotypeBases,hrunProfile);
|
|
||||||
if (DEBUG) {
|
|
||||||
System.out.println("Haplotype bases:");
|
|
||||||
System.out.println(new String(haplotypeBases));
|
|
||||||
for (int i=0; i < hrunProfile.length; i++)
|
|
||||||
System.out.format("%d",hrunProfile[i]);
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
|
||||||
|
|
||||||
gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
|
|
||||||
gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
|
|
||||||
|
|
||||||
|
// get homopolymer length profile for current haplotype
|
||||||
|
int[] hrunProfile = new int[haplotypeBases.length];
|
||||||
|
getContextHomopolymerLength(haplotypeBases,hrunProfile);
|
||||||
|
if (DEBUG) {
|
||||||
|
System.out.println("Haplotype bases:");
|
||||||
|
System.out.println(new String(haplotypeBases));
|
||||||
|
for (int i=0; i < hrunProfile.length; i++)
|
||||||
|
System.out.format("%d",hrunProfile[i]);
|
||||||
|
System.out.println();
|
||||||
}
|
}
|
||||||
|
fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
|
||||||
|
|
||||||
|
gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
|
||||||
|
gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for (PileupElement p: pileup) {
|
for (PileupElement p: pileup) {
|
||||||
|
// > 1 when the read is a consensus read representing multiple independent observations
|
||||||
|
final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
|
||||||
|
readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
|
||||||
|
|
||||||
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
// check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
|
||||||
if (indelLikelihoodMap.containsKey(p)) {
|
if (indelLikelihoodMap.containsKey(p)) {
|
||||||
|
|
@ -762,61 +307,20 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
//System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
|
||||||
GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
|
SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
|
||||||
if (read == null)
|
if (read == null)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) {
|
if ( isReduced ) {
|
||||||
|
read = ReadUtils.reducedReadWithReducedQuals(read);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(ReadUtils.is454Read(read)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
double[] recalQuals = null;
|
double[] recalQuals = null;
|
||||||
|
|
||||||
/*
|
|
||||||
if (getGapPenaltiesFromFile) {
|
|
||||||
RecalDataManager.parseSAMRecord( read, RAC );
|
|
||||||
|
|
||||||
|
|
||||||
recalQuals = new double[read.getReadLength()];
|
|
||||||
|
|
||||||
//compute all covariate values for this read
|
|
||||||
final Comparable[][] covariateValues_offset_x_covar =
|
|
||||||
RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
|
|
||||||
// For each base in the read
|
|
||||||
for( int offset = 0; offset < read.getReadLength(); offset++ ) {
|
|
||||||
|
|
||||||
final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
|
|
||||||
|
|
||||||
Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
|
|
||||||
if(qualityScore == null)
|
|
||||||
{
|
|
||||||
qualityScore = performSequentialQualityCalculation( fullCovariateKey );
|
|
||||||
qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
recalQuals[offset] = -((double)qualityScore)/10.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
|
|
||||||
// = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
|
|
||||||
if (DEBUG) {
|
|
||||||
System.out.format("\n\nStarting read:%s S:%d US:%d E:%d UE:%d C:%s\n",read.getReadName(),
|
|
||||||
read.getAlignmentStart(),
|
|
||||||
read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
|
|
||||||
read.getCigarString());
|
|
||||||
|
|
||||||
byte[] bases = read.getReadBases();
|
|
||||||
for (int k = 0; k < recalQuals.length; k++) {
|
|
||||||
System.out.format("%c",bases[k]);
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
|
|
||||||
for (int k = 0; k < recalQuals.length; k++) {
|
|
||||||
System.out.format("%.0f ",recalQuals[k]);
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
} */
|
|
||||||
// get bases of candidate haplotypes that overlap with reads
|
// get bases of candidate haplotypes that overlap with reads
|
||||||
final int trailingBases = 3;
|
final int trailingBases = 3;
|
||||||
|
|
||||||
|
|
@ -937,11 +441,6 @@ public class PairHMMIndelErrorModel {
|
||||||
unclippedReadBases.length-numEndClippedBases);
|
unclippedReadBases.length-numEndClippedBases);
|
||||||
|
|
||||||
double[] recalCDP = null;
|
double[] recalCDP = null;
|
||||||
if (getGapPenaltiesFromFile) {
|
|
||||||
recalCDP = Arrays.copyOfRange(recalQuals,numStartClippedBases,
|
|
||||||
unclippedReadBases.length-numEndClippedBases);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("Read bases:");
|
System.out.println("Read bases:");
|
||||||
|
|
@ -971,27 +470,9 @@ public class PairHMMIndelErrorModel {
|
||||||
System.out.println(new String(haplotypeBases));
|
System.out.println(new String(haplotypeBases));
|
||||||
}
|
}
|
||||||
|
|
||||||
Double readLikelihood = 0.0;
|
final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
|
||||||
if (useAffineGapModel) {
|
final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
|
||||||
|
final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
|
||||||
double[] currentContextGOP = null;
|
|
||||||
double[] currentContextGCP = null;
|
|
||||||
|
|
||||||
if (doContextDependentPenalties) {
|
|
||||||
|
|
||||||
if (getGapPenaltiesFromFile) {
|
|
||||||
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
|
|
||||||
|
|
||||||
} else {
|
|
||||||
currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
|
|
||||||
currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
|
|
||||||
readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
else
|
|
||||||
readLikelihood = computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
|
|
||||||
|
|
||||||
readEl.put(a,readLikelihood);
|
readEl.put(a,readLikelihood);
|
||||||
readLikelihoods[readIdx][j++] = readLikelihood;
|
readLikelihoods[readIdx][j++] = readLikelihood;
|
||||||
|
|
@ -1004,7 +485,7 @@ public class PairHMMIndelErrorModel {
|
||||||
|
|
||||||
if (DEBUG) {
|
if (DEBUG) {
|
||||||
System.out.println("\nLikelihood summary");
|
System.out.println("\nLikelihood summary");
|
||||||
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
|
for (readIdx=0; readIdx < pileup.size(); readIdx++) {
|
||||||
System.out.format("Read Index: %d ",readIdx);
|
System.out.format("Read Index: %d ",readIdx);
|
||||||
for (int i=0; i < readLikelihoods[readIdx].length; i++)
|
for (int i=0; i < readLikelihoods[readIdx].length; i++)
|
||||||
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
|
System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
|
||||||
|
|
@ -1012,123 +493,41 @@ public class PairHMMIndelErrorModel {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
|
||||||
|
}
|
||||||
|
|
||||||
|
private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
|
||||||
|
final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
|
||||||
|
|
||||||
|
// todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
|
||||||
for (int i=0; i < numHaplotypes; i++) {
|
for (int i=0; i < numHaplotypes; i++) {
|
||||||
for (int j=i; j < numHaplotypes; j++){
|
for (int j=i; j < numHaplotypes; j++){
|
||||||
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
// combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
|
||||||
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
// L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
|
||||||
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
//readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
|
||||||
for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
|
for (int readIdx = 0; readIdx < readLikelihoods.length; readIdx++) {
|
||||||
|
|
||||||
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
// Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
|
||||||
// First term is approximated by Jacobian log with table lookup.
|
// First term is approximated by Jacobian log with table lookup.
|
||||||
if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
|
if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
|
||||||
continue;
|
continue;
|
||||||
haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
|
final double li = readLikelihoods[readIdx][i];
|
||||||
readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
|
final double lj = readLikelihoods[readIdx][j];
|
||||||
|
final int readCount = readCounts[readIdx];
|
||||||
|
haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
|
final double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
|
|
||||||
int hSize = haplotypeLikehoodMatrix.length;
|
|
||||||
double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
|
|
||||||
|
|
||||||
int k=0;
|
int k=0;
|
||||||
double maxElement = Double.NEGATIVE_INFINITY;
|
for (int j=0; j < numHaplotypes; j++) {
|
||||||
for (int j=0; j < hSize; j++) {
|
|
||||||
for (int i=0; i <= j; i++){
|
for (int i=0; i <= j; i++){
|
||||||
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
|
genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
|
||||||
if (haplotypeLikehoodMatrix[i][j] > maxElement)
|
|
||||||
maxElement = haplotypeLikehoodMatrix[i][j];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// renormalize
|
// renormalize so that max element is zero.
|
||||||
for (int i=0; i < genotypeLikelihoods.length; i++)
|
return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
|
||||||
genotypeLikelihoods[i] -= maxElement;
|
|
||||||
|
|
||||||
return genotypeLikelihoods;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Implements a serial recalibration of the reads using the combinational table.
|
|
||||||
* First, we perform a positional recalibration, and then a subsequent dinuc correction.
|
|
||||||
*
|
|
||||||
* Given the full recalibration table, we perform the following preprocessing steps:
|
|
||||||
*
|
|
||||||
* - calculate the global quality score shift across all data [DeltaQ]
|
|
||||||
* - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
|
|
||||||
* -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
|
|
||||||
* - The final shift equation is:
|
|
||||||
*
|
|
||||||
* Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
|
|
||||||
* @param key The list of Comparables that were calculated from the covariates
|
|
||||||
* @return A recalibrated quality score as a byte
|
|
||||||
*/
|
|
||||||
/*
|
|
||||||
private byte performSequentialQualityCalculation( final Object... key ) {
|
|
||||||
|
|
||||||
final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
|
|
||||||
final Object[] readGroupCollapsedKey = new Object[1];
|
|
||||||
final Object[] qualityScoreCollapsedKey = new Object[2];
|
|
||||||
final Object[] covariateCollapsedKey = new Object[3];
|
|
||||||
|
|
||||||
// The global quality shift (over the read group only)
|
|
||||||
readGroupCollapsedKey[0] = key[0];
|
|
||||||
final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
|
|
||||||
double globalDeltaQ = 0.0;
|
|
||||||
if( globalRecalDatum != null ) {
|
|
||||||
final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
|
|
||||||
final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
|
|
||||||
globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The shift in quality between reported and empirical
|
|
||||||
qualityScoreCollapsedKey[0] = key[0];
|
|
||||||
qualityScoreCollapsedKey[1] = key[1];
|
|
||||||
final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
|
|
||||||
double deltaQReported = 0.0;
|
|
||||||
if( qReportedRecalDatum != null ) {
|
|
||||||
final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
|
|
||||||
deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
|
|
||||||
}
|
|
||||||
|
|
||||||
// The shift in quality due to each covariate by itself in turn
|
|
||||||
double deltaQCovariates = 0.0;
|
|
||||||
double deltaQCovariateEmpirical;
|
|
||||||
covariateCollapsedKey[0] = key[0];
|
|
||||||
covariateCollapsedKey[1] = key[1];
|
|
||||||
for( int iii = 2; iii < key.length; iii++ ) {
|
|
||||||
covariateCollapsedKey[2] = key[iii]; // The given covariate
|
|
||||||
final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
|
|
||||||
if( covariateRecalDatum != null ) {
|
|
||||||
deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
|
|
||||||
deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
|
|
||||||
return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
|
|
||||||
|
|
||||||
// Verbose printouts used to validate with old recalibrator
|
|
||||||
//if(key.contains(null)) {
|
|
||||||
// System.out.println( key + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
|
|
||||||
//}
|
|
||||||
//else {
|
|
||||||
// System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
|
|
||||||
// key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
|
|
||||||
//}
|
|
||||||
|
|
||||||
//return newQualityByte;
|
|
||||||
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -68,26 +68,59 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
* Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
|
||||||
|
* but this latter functionality is now superceded by UnifiedGenotyper.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
|
* This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
|
||||||
* data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
|
* data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
|
||||||
* and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
|
* include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
|
||||||
* or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
|
* forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
|
||||||
* the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
|
* statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
|
||||||
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
|
* attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
|
||||||
* if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
|
* metrics for the post-processing tools to make the final decision). The calls are performed by default
|
||||||
* only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
|
* from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
|
||||||
|
* command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
|
||||||
|
* respectively. Indels are called from the tumor sample and annotated as germline
|
||||||
|
* if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
|
||||||
|
* if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
|
||||||
|
* is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
|
||||||
|
* in tumor sample, it will not be reported.
|
||||||
*
|
*
|
||||||
* <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
|
* To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
|
||||||
* please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
|
* bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
|
||||||
|
* on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
|
||||||
*
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* Indel calls with associated metrics.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T SomaticIndelDetector \
|
||||||
|
* -o indels.vcf \
|
||||||
|
* -verbose indels.txt
|
||||||
|
* -I:normal normal.bam \
|
||||||
|
* -I:tumor tumor.bam
|
||||||
|
* </pre>
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
|
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
|
||||||
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
// @Output
|
// @Output
|
||||||
// PrintStream out;
|
// PrintStream out;
|
||||||
@Output(doc="File to which variants should be written",required=true)
|
@Output(doc="File to write variants (indels) in VCF format",required=true)
|
||||||
protected VCFWriter vcf_writer = null;
|
protected VCFWriter vcf_writer = null;
|
||||||
|
|
||||||
@Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
|
@Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
|
||||||
|
|
@ -102,68 +135,80 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName = "genotype_intervals", shortName = "genotype",
|
@Argument(fullName = "genotype_intervals", shortName = "genotype",
|
||||||
doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
|
doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
|
||||||
public String genotypeIntervalsFile = null;
|
public String genotypeIntervalsFile = null;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
|
@Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
|
||||||
doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
|
doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
|
||||||
"if the list turns out to be unsorted, it will throw an exception. "+
|
"if the list turns out to be unsorted, it will throw an exception. "+
|
||||||
"Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
|
"Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
|
||||||
"to sort and keep it in memory (increases memory usage!).")
|
"to sort and keep it in memory (increases memory usage!).")
|
||||||
protected boolean GENOTYPE_NOT_SORTED = false;
|
protected boolean GENOTYPE_NOT_SORTED = false;
|
||||||
|
|
||||||
@Hidden
|
@Hidden
|
||||||
@Argument(fullName="unpaired", shortName="unpaired",
|
@Argument(fullName="unpaired", shortName="unpaired",
|
||||||
doc="Perform unpaired calls (no somatic status detection)", required=false)
|
doc="Perform unpaired calls (no somatic status detection)", required=false)
|
||||||
boolean call_unpaired = false;
|
boolean call_unpaired = false;
|
||||||
boolean call_somatic ;
|
boolean call_somatic ;
|
||||||
|
|
||||||
@Argument(fullName="verboseOutput", shortName="verbose",
|
@Argument(fullName="verboseOutput", shortName="verbose",
|
||||||
doc="Verbose output file in text format", required=false)
|
doc="Verbose output file in text format", required=false)
|
||||||
java.io.File verboseOutput = null;
|
java.io.File verboseOutput = null;
|
||||||
|
|
||||||
@Argument(fullName="bedOutput", shortName="bed",
|
@Argument(fullName="bedOutput", shortName="bed",
|
||||||
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
|
doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
|
||||||
java.io.File bedOutput = null;
|
java.io.File bedOutput = null;
|
||||||
|
|
||||||
@Argument(fullName="minCoverage", shortName="minCoverage",
|
@Argument(fullName="minCoverage", shortName="minCoverage",
|
||||||
doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
|
doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
|
||||||
int minCoverage = 6;
|
"with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
|
||||||
|
int minCoverage = 6;
|
||||||
|
|
||||||
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
|
@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
|
||||||
doc="used only with --somatic; normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
|
doc="used only in default (somatic) mode; normal sample must have at least minNormalCoverage "+
|
||||||
int minNormalCoverage = 4;
|
"or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
|
||||||
|
int minNormalCoverage = 4;
|
||||||
|
|
||||||
@Argument(fullName="minFraction", shortName="minFraction",
|
@Argument(fullName="minFraction", shortName="minFraction",
|
||||||
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
|
doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
|
||||||
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
|
" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
|
||||||
double minFraction = 0.3;
|
double minFraction = 0.3;
|
||||||
|
|
||||||
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
|
||||||
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
|
doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
|
||||||
double minConsensusFraction = 0.7;
|
"all indel observations at the site exceeds this threshold", required=false)
|
||||||
|
double minConsensusFraction = 0.7;
|
||||||
|
|
||||||
@Argument(fullName="minIndelCount", shortName="minCnt",
|
@Argument(fullName="minIndelCount", shortName="minCnt",
|
||||||
doc="Minimum count of reads supporting consensus indel required for making the call. "+
|
doc="Minimum count of reads supporting consensus indel required for making the call. "+
|
||||||
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
|
" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
|
||||||
"(minIndelCount not met) will not pass.", required=false)
|
"(minIndelCount not met) will not pass.", required=false)
|
||||||
int minIndelCount = 0;
|
int minIndelCount = 0;
|
||||||
|
|
||||||
@Argument(fullName="refseq", shortName="refseq",
|
@Argument(fullName="refseq", shortName="refseq",
|
||||||
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
|
doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with "+
|
||||||
String RefseqFileName = null;
|
"GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
|
||||||
|
String RefseqFileName = null;
|
||||||
|
|
||||||
@Argument(fullName="blacklistedLanes", shortName="BL",
|
//@Argument(fullName="blacklistedLanes", shortName="BL",
|
||||||
doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
|
// doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
|
||||||
"by this application, so they will not contribute indels to consider and will not be counted.", required=false)
|
// "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
|
||||||
PlatformUnitFilterHelper dummy;
|
//PlatformUnitFilterHelper dummy;
|
||||||
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
|
|
||||||
|
@Hidden
|
||||||
|
@Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
|
||||||
|
required=false) Boolean DEBUG = false;
|
||||||
@Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
|
@Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
|
||||||
"May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
|
"May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
|
||||||
|
"window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
|
||||||
|
"than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
|
||||||
|
"if window is too small",required=false) int WINDOW_SIZE = 200;
|
||||||
@Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
|
@Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
|
||||||
" the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
|
" the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
private WindowContext tumor_context;
|
private WindowContext tumor_context;
|
||||||
private WindowContext normal_context;
|
private WindowContext normal_context;
|
||||||
private int currentContigIndex = -1;
|
private int currentContigIndex = -1;
|
||||||
|
|
|
||||||
|
|
@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
|
||||||
public PhasingRead(int length, int mappingQual) {
|
public PhasingRead(int length, int mappingQual) {
|
||||||
super(length);
|
super(length);
|
||||||
|
|
||||||
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
|
this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));
|
||||||
|
|
||||||
this.baseProbs = new PreciseNonNegativeDouble[length];
|
this.baseProbs = new PreciseNonNegativeDouble[length];
|
||||||
Arrays.fill(this.baseProbs, null);
|
Arrays.fill(this.baseProbs, null);
|
||||||
|
|
|
||||||
|
|
@ -44,12 +44,12 @@ public class RefSeqDataParser {
|
||||||
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
|
||||||
|
|
||||||
Map<String, String> entriesToNames = new HashMap<String, String>();
|
Map<String, String> entriesToNames = new HashMap<String, String>();
|
||||||
Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
|
int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
|
||||||
if (numRecords != null) {
|
if (numRecords != -1) {
|
||||||
boolean done = false;
|
boolean done = false;
|
||||||
|
|
||||||
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
|
||||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||||
if (name != null) {
|
if (name != null) {
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
entriesToNames.put(nameKeyToUse, name);
|
||||||
done = true;
|
done = true;
|
||||||
|
|
@ -59,14 +59,14 @@ public class RefSeqDataParser {
|
||||||
if (!done) {
|
if (!done) {
|
||||||
for (int i = 1; i <= numRecords; i++) {
|
for (int i = 1; i <= numRecords; i++) {
|
||||||
String key = nameKeyToUseMultiplePrefix + i;
|
String key = nameKeyToUseMultiplePrefix + i;
|
||||||
String name = vc.getAttributeAsStringNoException(key);
|
String name = vc.getAttributeAsString(key, null);
|
||||||
if (name != null)
|
if (name != null)
|
||||||
entriesToNames.put(key, name);
|
entriesToNames.put(key, name);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else { // no entry with the # of records:
|
else { // no entry with the # of records:
|
||||||
String name = vc.getAttributeAsStringNoException(nameKeyToUse);
|
String name = vc.getAttributeAsString(nameKeyToUse, null);
|
||||||
if (name != null) {
|
if (name != null) {
|
||||||
entriesToNames.put(nameKeyToUse, name);
|
entriesToNames.put(nameKeyToUse, name);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* <p>Body test</p>
|
* <p>Body test</p>
|
||||||
*/
|
*/
|
||||||
|
@Hidden
|
||||||
public class DocumentationTest extends RodWalker<Integer, Integer> {
|
public class DocumentationTest extends RodWalker<Integer, Integer> {
|
||||||
// the docs for the arguments are in the collection
|
// the docs for the arguments are in the collection
|
||||||
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
@ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
|
||||||
|
|
|
||||||
|
|
@ -76,6 +76,42 @@ import java.util.Map;
|
||||||
* <h2>Output</h2>
|
* <h2>Output</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* A recalibration table file in CSV format that is used by the TableRecalibration walker.
|
* A recalibration table file in CSV format that is used by the TableRecalibration walker.
|
||||||
|
* It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.
|
||||||
|
*
|
||||||
|
* The first 20 lines of such a file is shown below.
|
||||||
|
* * The file begins with a series of comment lines describing:
|
||||||
|
* ** The number of counted loci
|
||||||
|
* ** The number of counted bases
|
||||||
|
* ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
|
||||||
|
*
|
||||||
|
* * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.
|
||||||
|
*
|
||||||
|
* * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
|
||||||
|
* depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of
|
||||||
|
* reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* # Counted Sites 19451059
|
||||||
|
* # Counted Bases 56582018
|
||||||
|
* # Skipped Sites 82666
|
||||||
|
* # Fraction Skipped 1 / 235 bp
|
||||||
|
* ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
|
||||||
|
* SRR006446,11,65,CA,9,1,10
|
||||||
|
* SRR006446,11,48,TA,10,0,40
|
||||||
|
* SRR006446,11,67,AA,27,0,40
|
||||||
|
* SRR006446,11,61,GA,11,1,10
|
||||||
|
* SRR006446,12,34,CA,47,1,17
|
||||||
|
* SRR006446,12,30,GA,52,1,17
|
||||||
|
* SRR006446,12,36,AA,352,1,25
|
||||||
|
* SRR006446,12,17,TA,182,11,12
|
||||||
|
* SRR006446,11,48,TG,2,0,40
|
||||||
|
* SRR006446,11,67,AG,1,0,40
|
||||||
|
* SRR006446,12,34,CG,9,0,40
|
||||||
|
* SRR006446,12,30,GG,43,0,40
|
||||||
|
* ERR001876,4,31,AG,1,0,40
|
||||||
|
* ERR001876,4,31,AT,2,2,1
|
||||||
|
* ERR001876,4,31,CA,1,0,40
|
||||||
|
* </pre>
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h2>Examples</h2>
|
* <h2>Examples</h2>
|
||||||
|
|
|
||||||
|
|
@ -61,7 +61,7 @@ import java.util.List;
|
||||||
* CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
|
* CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
|
||||||
*</pre>
|
*</pre>
|
||||||
* are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
|
* are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
|
||||||
*
|
*<pre>
|
||||||
* Valid // amplicon is valid
|
* Valid // amplicon is valid
|
||||||
* SITE_IS_FILTERED=1 // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
|
* SITE_IS_FILTERED=1 // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
|
||||||
* VARIANT_TOO_NEAR_PROBE=1 // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
|
* VARIANT_TOO_NEAR_PROBE=1 // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
|
||||||
|
|
@ -72,10 +72,10 @@ import java.util.List;
|
||||||
* END_TOO_CLOSE, // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
|
* END_TOO_CLOSE, // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
|
||||||
* NO_VARIANTS_FOUND, // no variants found within the amplicon region
|
* NO_VARIANTS_FOUND, // no variants found within the amplicon region
|
||||||
* INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
|
* INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
|
||||||
* </p>
|
* </pre></p>
|
||||||
*
|
*
|
||||||
* <h2>Examples</h2>
|
* <h2>Examples</h2>
|
||||||
* <pre></pre>
|
* <pre>
|
||||||
* java
|
* java
|
||||||
* -jar GenomeAnalysisTK.jar
|
* -jar GenomeAnalysisTK.jar
|
||||||
* -T ValidationAmplicons
|
* -T ValidationAmplicons
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,23 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* <h2>Output</h2>
|
* <h2>Output</h2>
|
||||||
* <p>
|
* <p>
|
||||||
* Evaluation tables.
|
* Evaluation tables detailing the results of the eval modules which were applied.
|
||||||
|
* For example:
|
||||||
|
* <pre>
|
||||||
|
* output.eval.gatkreport:
|
||||||
|
* ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample
|
||||||
|
* CountVariants CompRod CpG EvalRod JexlExpression Novelty nProcessedLoci nCalledLoci nRefLoci nVariantLoci variantRate ...
|
||||||
|
* CountVariants dbsnp CpG eval none all 65900028 135770 0 135770 0.00206024 ...
|
||||||
|
* CountVariants dbsnp CpG eval none known 65900028 47068 0 47068 0.00071423 ...
|
||||||
|
* CountVariants dbsnp CpG eval none novel 65900028 88702 0 88702 0.00134601 ...
|
||||||
|
* CountVariants dbsnp all eval none all 65900028 330818 0 330818 0.00502000 ...
|
||||||
|
* CountVariants dbsnp all eval none known 65900028 120685 0 120685 0.00183133 ...
|
||||||
|
* CountVariants dbsnp all eval none novel 65900028 210133 0 210133 0.00318866 ...
|
||||||
|
* CountVariants dbsnp non_CpG eval none all 65900028 195048 0 195048 0.00295976 ...
|
||||||
|
* CountVariants dbsnp non_CpG eval none known 65900028 73617 0 73617 0.00111710 ...
|
||||||
|
* CountVariants dbsnp non_CpG eval none novel 65900028 121431 0 121431 0.00184265 ...
|
||||||
|
* ...
|
||||||
|
* </pre>
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
* <h2>Examples</h2>
|
* <h2>Examples</h2>
|
||||||
|
|
@ -149,12 +165,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
@Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
|
||||||
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
|
||||||
|
|
||||||
@Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
|
|
||||||
private String TRANCHE_FILENAME = null;
|
|
||||||
|
|
||||||
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
@Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
|
||||||
private File ancestralAlignmentsFile = null;
|
private File ancestralAlignmentsFile = null;
|
||||||
|
|
||||||
|
@Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
|
||||||
|
private boolean requireStrictAlleleMatch = false;
|
||||||
|
|
||||||
// Variables
|
// Variables
|
||||||
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
|
||||||
|
|
||||||
|
|
@ -226,16 +242,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
}
|
}
|
||||||
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
sampleNamesForStratification.add(ALL_SAMPLE_NAME);
|
||||||
|
|
||||||
// Add select expressions for anything in the tranches file
|
|
||||||
if ( TRANCHE_FILENAME != null ) {
|
|
||||||
// we are going to build a few select names automatically from the tranches file
|
|
||||||
for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
|
|
||||||
logger.info("Adding select for all variant above the pCut of : " + t);
|
|
||||||
SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
|
|
||||||
SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Initialize select expressions
|
// Initialize select expressions
|
||||||
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
|
for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
|
||||||
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
|
SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
|
||||||
|
|
@ -245,18 +251,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
// Initialize the set of stratifications and evaluations to use
|
// Initialize the set of stratifications and evaluations to use
|
||||||
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
|
||||||
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
|
||||||
boolean usingJEXL = false;
|
|
||||||
for ( VariantStratifier vs : getStratificationObjects() ) {
|
for ( VariantStratifier vs : getStratificationObjects() ) {
|
||||||
if ( vs.getClass().getSimpleName().equals("Filter") )
|
if ( vs.getClass().getSimpleName().equals("Filter") )
|
||||||
byFilterIsEnabled = true;
|
byFilterIsEnabled = true;
|
||||||
else if ( vs.getClass().getSimpleName().equals("Sample") )
|
else if ( vs.getClass().getSimpleName().equals("Sample") )
|
||||||
perSampleIsEnabled = true;
|
perSampleIsEnabled = true;
|
||||||
usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( TRANCHE_FILENAME != null && ! usingJEXL )
|
|
||||||
throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled");
|
|
||||||
|
|
||||||
// Initialize the evaluation contexts
|
// Initialize the evaluation contexts
|
||||||
evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
|
||||||
|
|
||||||
|
|
@ -378,16 +379,16 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
|
||||||
if ( matchingComps.size() == 0 )
|
if ( matchingComps.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
// find the comp which matches the alternate allele from eval
|
// find the comp which matches both the reference allele and alternate allele from eval
|
||||||
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
|
||||||
for ( VariantContext comp : matchingComps ) {
|
for ( VariantContext comp : matchingComps ) {
|
||||||
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
|
||||||
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp)) )
|
if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
|
||||||
return comp;
|
return comp;
|
||||||
}
|
}
|
||||||
|
|
||||||
// if none match, just return the first one
|
// if none match, just return the first one unless we require a strict match
|
||||||
return matchingComps.get(0);
|
return (requireStrictAlleleMatch ? null : matchingComps.get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
|
||||||
|
|
|
||||||
|
|
@ -22,9 +22,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||||
@DataPoint(description = "number of eval SNP sites")
|
@DataPoint(description = "number of eval SNP sites")
|
||||||
long nEvalVariants = 0;
|
long nEvalVariants = 0;
|
||||||
|
|
||||||
@DataPoint(description = "number of comp SNP sites")
|
|
||||||
long nCompVariants = 0;
|
|
||||||
|
|
||||||
@DataPoint(description = "number of eval sites outside of comp sites")
|
@DataPoint(description = "number of eval sites outside of comp sites")
|
||||||
long novelSites = 0;
|
long novelSites = 0;
|
||||||
|
|
||||||
|
|
@ -75,10 +72,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
boolean evalIsGood = eval != null && eval.isVariant();
|
boolean evalIsGood = eval != null && eval.isPolymorphic();
|
||||||
boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
|
boolean compIsGood = comp != null && comp.isNotFiltered();
|
||||||
|
|
||||||
if (compIsGood) nCompVariants++; // count the number of comp events
|
|
||||||
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
if (evalIsGood) nEvalVariants++; // count the number of eval events
|
||||||
|
|
||||||
if (compIsGood && evalIsGood) {
|
if (compIsGood && evalIsGood) {
|
||||||
|
|
|
||||||
|
|
@ -100,21 +100,22 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||||
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
// So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
|
||||||
// added in a proxy check for monomorphic status here.
|
// added in a proxy check for monomorphic status here.
|
||||||
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
// Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
|
||||||
if ( !vc1.isVariant() || (vc1.hasGenotypes() && vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
|
if ( vc1.isMonomorphic() ) {
|
||||||
nRefLoci++;
|
nRefLoci++;
|
||||||
} else {
|
} else {
|
||||||
switch (vc1.getType()) {
|
switch (vc1.getType()) {
|
||||||
case NO_VARIATION:
|
case NO_VARIATION:
|
||||||
|
// shouldn't get here
|
||||||
break;
|
break;
|
||||||
case SNP:
|
case SNP:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
nSNPs++;
|
nSNPs++;
|
||||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||||
break;
|
break;
|
||||||
case MNP:
|
case MNP:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
nMNPs++;
|
nMNPs++;
|
||||||
if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
|
if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
|
||||||
break;
|
break;
|
||||||
case INDEL:
|
case INDEL:
|
||||||
nVariantLoci++;
|
nVariantLoci++;
|
||||||
|
|
@ -136,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
|
||||||
|
|
||||||
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
String refStr = vc1.getReference().getBaseString().toUpperCase();
|
||||||
|
|
||||||
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
|
String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
|
||||||
// if (aaStr.equals(".")) {
|
// if (aaStr.equals(".")) {
|
||||||
// aaStr = refStr;
|
// aaStr = refStr;
|
||||||
// }
|
// }
|
||||||
|
|
|
||||||
|
|
@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Double getPQ(Genotype gt) {
|
public static Double getPQ(Genotype gt) {
|
||||||
return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
|
Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
|
||||||
|
return d == -1 ? null : d;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
|
||||||
|
|
|
||||||
|
|
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
|
||||||
public int getComparisonOrder() { return 1; } // need only the evals
|
public int getComparisonOrder() { return 1; } // need only the evals
|
||||||
|
|
||||||
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
|
|
||||||
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
|
||||||
return vc1.toString(); // biallelic sites are output
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( vc1.isIndel() ) {
|
if ( vc1.isIndel() && vc1.isPolymorphic() ) {
|
||||||
|
|
||||||
|
if ( ! vc1.isBiallelic() ) {
|
||||||
|
//veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
|
||||||
|
return vc1.toString(); // biallelic sites are output
|
||||||
|
}
|
||||||
|
|
||||||
|
// only count simple insertions/deletions, not complex indels
|
||||||
if ( vc1.isSimpleInsertion() ) {
|
if ( vc1.isSimpleInsertion() ) {
|
||||||
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
indelHistogram.update(vc1.getAlternateAllele(0).length());
|
||||||
} else if ( vc1.isSimpleDeletion() ) {
|
} else if ( vc1.isSimpleDeletion() ) {
|
||||||
indelHistogram.update(-vc1.getReference().length());
|
indelHistogram.update(-vc1.getReference().length());
|
||||||
} else {
|
|
||||||
throw new ReviewedStingException("Indel type that is not insertion or deletion.");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
|
||||||
|
|
||||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
||||||
if (eval != null ) {
|
if (eval != null && eval.isPolymorphic()) {
|
||||||
if ( indelStats == null ) {
|
if ( indelStats == null ) {
|
||||||
indelStats = new IndelStats(eval);
|
indelStats = new IndelStats(eval);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
||||||
if ( eval.hasGenotypes() )
|
if ( eval.hasGenotypes() )
|
||||||
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
|
||||||
else if ( eval.hasAttribute("AC") ) {
|
else if ( eval.hasAttribute("AC") ) {
|
||||||
ac = Integer.valueOf(eval.getAttributeAsString("AC"));
|
ac = eval.getAttributeAsInt("AC", -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( ac != -1 ) {
|
if ( ac != -1 ) {
|
||||||
|
|
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
|
if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
|
||||||
metrics.incrValue(eval);
|
metrics.incrValue(eval);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
|
||||||
}
|
}
|
||||||
|
|
||||||
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
|
if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
|
||||||
return null; //no interesting sites
|
return null; //no interesting sites
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vc.hasGenotypes()) {
|
//this maps allele to a count
|
||||||
|
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
||||||
|
|
||||||
//this maps allele to a count
|
int numHetsHere = 0;
|
||||||
ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
|
float numGenosHere = 0;
|
||||||
|
int numIndsHere = 0;
|
||||||
|
|
||||||
int numHetsHere = 0;
|
for (Genotype genotype : vc.getGenotypes().values()) {
|
||||||
float numGenosHere = 0;
|
numIndsHere++;
|
||||||
int numIndsHere = 0;
|
if (!genotype.isNoCall()) {
|
||||||
|
//increment stats for heterozygosity
|
||||||
|
if (genotype.isHet()) {
|
||||||
|
numHetsHere++;
|
||||||
|
}
|
||||||
|
|
||||||
for (Genotype genotype : vc.getGenotypes().values()) {
|
numGenosHere++;
|
||||||
numIndsHere++;
|
//increment stats for pairwise mismatches
|
||||||
if (!genotype.isNoCall()) {
|
|
||||||
//increment stats for heterozygosity
|
|
||||||
if (genotype.isHet()) {
|
|
||||||
numHetsHere++;
|
|
||||||
}
|
|
||||||
|
|
||||||
numGenosHere++;
|
for (Allele allele : genotype.getAlleles()) {
|
||||||
//increment stats for pairwise mismatches
|
if (allele.isNonNull() && allele.isCalled()) {
|
||||||
|
String alleleString = allele.toString();
|
||||||
for (Allele allele : genotype.getAlleles()) {
|
alleleCounts.putIfAbsent(alleleString, 0);
|
||||||
if (allele.isNonNull() && allele.isCalled()) {
|
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
||||||
String alleleString = allele.toString();
|
|
||||||
alleleCounts.putIfAbsent(alleleString, 0);
|
|
||||||
alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (numGenosHere > 0) {
|
}
|
||||||
//only if have one called genotype at least
|
if (numGenosHere > 0) {
|
||||||
this.numSites++;
|
//only if have one called genotype at least
|
||||||
|
this.numSites++;
|
||||||
|
|
||||||
this.totalHet += numHetsHere / numGenosHere;
|
this.totalHet += numHetsHere / numGenosHere;
|
||||||
|
|
||||||
//compute based on num sites
|
//compute based on num sites
|
||||||
float harmonicFactor = 0;
|
float harmonicFactor = 0;
|
||||||
for (int i = 1; i <= numIndsHere; i++) {
|
for (int i = 1; i <= numIndsHere; i++) {
|
||||||
harmonicFactor += 1.0 / i;
|
harmonicFactor += 1.0 / i;
|
||||||
}
|
}
|
||||||
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
this.thetaRegionNumSites += 1.0 / harmonicFactor;
|
||||||
|
|
||||||
//now compute pairwise mismatches
|
//now compute pairwise mismatches
|
||||||
float numPairwise = 0;
|
float numPairwise = 0;
|
||||||
float numDiffs = 0;
|
float numDiffs = 0;
|
||||||
for (String allele1 : alleleCounts.keySet()) {
|
for (String allele1 : alleleCounts.keySet()) {
|
||||||
int allele1Count = alleleCounts.get(allele1);
|
int allele1Count = alleleCounts.get(allele1);
|
||||||
|
|
||||||
for (String allele2 : alleleCounts.keySet()) {
|
for (String allele2 : alleleCounts.keySet()) {
|
||||||
if (allele1.compareTo(allele2) < 0) {
|
if (allele1.compareTo(allele2) < 0) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (allele1 .compareTo(allele2) == 0) {
|
if (allele1 .compareTo(allele2) == 0) {
|
||||||
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
numPairwise += allele1Count * (allele1Count - 1) * .5;
|
||||||
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
int allele2Count = alleleCounts.get(allele2);
|
int allele2Count = alleleCounts.get(allele2);
|
||||||
numPairwise += allele1Count * allele2Count;
|
numPairwise += allele1Count * allele2Count;
|
||||||
numDiffs += allele1Count * allele2Count;
|
numDiffs += allele1Count * allele2Count;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (numPairwise > 0) {
|
if (numPairwise > 0) {
|
||||||
this.totalAvgDiffs += numDiffs / numPairwise;
|
this.totalAvgDiffs += numDiffs / numPairwise;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
||||||
}
|
}
|
||||||
|
|
||||||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
|
||||||
if (VariantContextUtils.isTransition(vc)) {
|
if (VariantContextUtils.isTransition(vc)) {
|
||||||
if (updateStandard) nTiInComp++;
|
if (updateStandard) nTiInComp++;
|
||||||
else nTi++;
|
else nTi++;
|
||||||
|
|
@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
|
||||||
else nTv++;
|
else nTv++;
|
||||||
}
|
}
|
||||||
|
|
||||||
String refStr = vc.getReference().getBaseString().toUpperCase();
|
if (vc.hasAttribute("ANCESTRALALLELE")) {
|
||||||
String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
|
final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
|
||||||
|
if ( ! aaStr.equals(".") ) {
|
||||||
if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
|
switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
|
||||||
BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
|
case TRANSITION: nTiDerived++; break;
|
||||||
|
case TRANSVERSION: nTvDerived++; break;
|
||||||
//System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
|
default: break;
|
||||||
|
}
|
||||||
if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
|
|
||||||
nTiDerived++;
|
|
||||||
} else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
|
|
||||||
nTvDerived++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||||
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
if ( vc.isMonomorphic() ) return SiteStatus.MONO;
|
||||||
|
if ( vc.hasGenotypes() ) return SiteStatus.POLY; // must be polymorphic if isMonomorphic was false and there are genotypes
|
||||||
|
|
||||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||||
int ac = 0;
|
int ac = 0;
|
||||||
|
|
@ -130,10 +131,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||||
//// System.out.printf(" ac = %d%n", ac);
|
//// System.out.printf(" ac = %d%n", ac);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
|
||||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||||
} else if ( vc.hasGenotypes() ) {
|
|
||||||
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
|
||||||
} else {
|
} else {
|
||||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||||
|
|
|
||||||
|
|
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
|
||||||
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
final String interesting = null;
|
final String interesting = null;
|
||||||
|
|
||||||
if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
|
||||||
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
if( titvStats == null ) { titvStats = new TiTvStats(); }
|
||||||
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
int AC = -1;
|
int AC = -1;
|
||||||
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
|
||||||
AC = eval.getAttributeAsInt("AC");
|
AC = eval.getAttributeAsInt("AC", 0);
|
||||||
} else if ( eval.isVariant() ) {
|
} else if ( eval.isVariant() ) {
|
||||||
for (Allele allele : eval.getAlternateAlleles())
|
for (Allele allele : eval.getAlternateAlleles())
|
||||||
AC = Math.max(AC, eval.getChromosomeCount(allele));
|
AC = Math.max(AC, eval.getChromosomeCount(allele));
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {
|
||||||
|
|
||||||
if (eval != null) {
|
if (eval != null) {
|
||||||
try {
|
try {
|
||||||
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
|
relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return relevantStates;
|
return relevantStates;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
|
||||||
Integer frame = null;
|
Integer frame = null;
|
||||||
|
|
||||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||||
aa = eval.getAttributeAsString("refseq.variantAA");
|
aa = eval.getAttributeAsString("refseq.variantAA", null);
|
||||||
frame = eval.getAttributeAsInt("refseq.frame");
|
frame = eval.getAttributeAsInt("refseq.frame", 0);
|
||||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||||
int annotationId = 1;
|
int annotationId = 1;
|
||||||
String key;
|
String key;
|
||||||
|
|
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
|
||||||
do {
|
do {
|
||||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||||
|
|
||||||
String newtype = eval.getAttributeAsString(key);
|
String newtype = eval.getAttributeAsString(key, null);
|
||||||
|
|
||||||
if ( newtype != null &&
|
if ( newtype != null &&
|
||||||
( type == null ||
|
( type == null ||
|
||||||
|
|
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
|
||||||
type = newtype;
|
type = newtype;
|
||||||
|
|
||||||
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
String aakey = String.format("refseq.variantAA_%d", annotationId);
|
||||||
aa = eval.getAttributeAsString(aakey);
|
aa = eval.getAttributeAsString(aakey, null);
|
||||||
|
|
||||||
if (aa != null) {
|
if (aa != null) {
|
||||||
String framekey = String.format("refseq.frame_%d", annotationId);
|
String framekey = String.format("refseq.frame_%d", annotationId);
|
||||||
|
|
||||||
if (eval.hasAttribute(framekey)) {
|
if (eval.hasAttribute(framekey)) {
|
||||||
frame = eval.getAttributeAsInt(framekey);
|
frame = eval.getAttributeAsInt(framekey, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -11,25 +12,34 @@ import java.util.List;
|
||||||
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
|
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
|
||||||
*/
|
*/
|
||||||
public class FunctionalClass extends VariantStratifier {
|
public class FunctionalClass extends VariantStratifier {
|
||||||
@Override
|
|
||||||
public void initialize() {
|
public enum FunctionalType {
|
||||||
states.add("all");
|
silent,
|
||||||
states.add("silent");
|
missense,
|
||||||
states.add("missense");
|
nonsense
|
||||||
states.add("nonsense");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
@Override
|
||||||
|
public void initialize() {
|
||||||
|
states.add("all");
|
||||||
|
for ( FunctionalType type : FunctionalType.values() )
|
||||||
|
states.add(type.name());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
|
||||||
ArrayList<String> relevantStates = new ArrayList<String>();
|
ArrayList<String> relevantStates = new ArrayList<String>();
|
||||||
|
|
||||||
relevantStates.add("all");
|
relevantStates.add("all");
|
||||||
|
|
||||||
if (eval != null && eval.isVariant()) {
|
if (eval != null && eval.isVariant()) {
|
||||||
String type = null;
|
FunctionalType type = null;
|
||||||
|
|
||||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||||
type = eval.getAttributeAsString("refseq.functionalClass");
|
try {
|
||||||
|
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
|
||||||
|
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||||
int annotationId = 1;
|
int annotationId = 1;
|
||||||
String key;
|
String key;
|
||||||
|
|
@ -37,24 +47,36 @@ public class FunctionalClass extends VariantStratifier {
|
||||||
do {
|
do {
|
||||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||||
|
|
||||||
String newtype = eval.getAttributeAsString(key);
|
String newtypeStr = eval.getAttributeAsString(key, null);
|
||||||
|
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
|
||||||
if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
|
try {
|
||||||
( type == null ||
|
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
|
||||||
( type.equals("silent") && !newtype.equals("silent") ) ||
|
if ( type == null ||
|
||||||
( type.equals("missense") && newtype.equals("nonsense") ) )
|
( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
|
||||||
) {
|
( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
|
||||||
type = newtype;
|
type = newType;
|
||||||
|
}
|
||||||
|
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||||
}
|
}
|
||||||
|
|
||||||
annotationId++;
|
annotationId++;
|
||||||
} while (eval.hasAttribute(key));
|
} while (eval.hasAttribute(key));
|
||||||
|
|
||||||
|
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) {
|
||||||
|
try {
|
||||||
|
SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString());
|
||||||
|
if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE )
|
||||||
|
type = FunctionalType.nonsense;
|
||||||
|
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE )
|
||||||
|
type = FunctionalType.missense;
|
||||||
|
else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT )
|
||||||
|
type = FunctionalType.silent;
|
||||||
|
}
|
||||||
|
catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type != null) {
|
if ( type != null ) {
|
||||||
if (type.equals("silent")) { relevantStates.add("silent"); }
|
relevantStates.add(type.name());
|
||||||
else if (type.equals("missense")) { relevantStates.add("missense"); }
|
|
||||||
else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -277,7 +277,7 @@ public class VariantEvalUtils {
|
||||||
* @return a new VariantContext with just the requested samples
|
* @return a new VariantContext with just the requested samples
|
||||||
*/
|
*/
|
||||||
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
|
||||||
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
|
VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
|
||||||
|
|
||||||
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
|
HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
|
||||||
|
|
||||||
|
|
@ -354,7 +354,7 @@ public class VariantEvalUtils {
|
||||||
|
|
||||||
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
|
private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
|
||||||
if ( !mappings.containsKey(sample) )
|
if ( !mappings.containsKey(sample) )
|
||||||
mappings.put(sample, new HashSet<VariantContext>());
|
mappings.put(sample, new LinkedHashSet<VariantContext>());
|
||||||
mappings.get(sample).add(vc);
|
mappings.get(sample).add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,76 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011 The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||||
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: rpoplin
|
||||||
|
* Date: 3/12/11
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class TrainingSet {
|
||||||
|
|
||||||
|
public RodBinding<VariantContext> rodBinding;
|
||||||
|
public boolean isKnown = false;
|
||||||
|
public boolean isTraining = false;
|
||||||
|
public boolean isAntiTraining = false;
|
||||||
|
public boolean isTruth = false;
|
||||||
|
public boolean isConsensus = false;
|
||||||
|
public double prior = 0.0;
|
||||||
|
|
||||||
|
protected final static Logger logger = Logger.getLogger(TrainingSet.class);
|
||||||
|
|
||||||
|
public TrainingSet( final RodBinding<VariantContext> rodBinding) {
|
||||||
|
this.rodBinding = rodBinding;
|
||||||
|
|
||||||
|
final Tags tags = rodBinding.getTags();
|
||||||
|
final String name = rodBinding.getName();
|
||||||
|
|
||||||
|
// Parse the tags to decide which tracks have which properties
|
||||||
|
if( tags != null ) {
|
||||||
|
isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
|
||||||
|
isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
|
||||||
|
isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
|
||||||
|
isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
|
||||||
|
isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
|
||||||
|
prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
|
||||||
|
}
|
||||||
|
|
||||||
|
// Report back to the user which tracks were found and the properties that were detected
|
||||||
|
if( !isConsensus && !isAntiTraining ) {
|
||||||
|
logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", name, isKnown, isTraining, isTruth, prior) );
|
||||||
|
} else if( isConsensus ) {
|
||||||
|
logger.info( String.format( "Found consensus track: %s", name) );
|
||||||
|
} else {
|
||||||
|
logger.info( String.format( "Found bad sites training track: %s", name) );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue