Merge remote-tracking branch 'unstable/master'
This commit is contained in:
commit
3cd9f3fe81
477
build.xml
477
build.xml
|
|
@ -43,16 +43,17 @@
|
||||||
<property name="scala.classes" value="${build.dir}/scala/classes" />
|
<property name="scala.classes" value="${build.dir}/scala/classes" />
|
||||||
|
|
||||||
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
|
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
|
||||||
|
|
||||||
|
<property name="javadoc.dir" value="javadoc" />
|
||||||
|
<property name="scaladoc.dir" value="scaladoc" />
|
||||||
|
|
||||||
<!-- Contracts for Java -->
|
<!-- Contracts for Java -->
|
||||||
<!-- To disable, run with -Duse.contracts=false -->
|
<!-- By default, enabled only for test targets -->
|
||||||
<property name="use.contracts" value="true" />
|
<!-- To disable for test targets, run with -Duse.contracts=false -->
|
||||||
|
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
|
||||||
<property name="java.contracts" value="${build.dir}/java/contracts" />
|
<property name="java.contracts" value="${build.dir}/java/contracts" />
|
||||||
<property name="contracts.version" value="1.0-20110609" />
|
<property name="contracts.version" value="1.0-20110609" />
|
||||||
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
|
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
|
||||||
|
|
||||||
<!-- where to find the tribble distro -->
|
|
||||||
<property name="tribble.dir" value="tribble" />
|
|
||||||
|
|
||||||
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
|
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
|
||||||
<property name="findbugs.home" value="./findbugs"/>
|
<property name="findbugs.home" value="./findbugs"/>
|
||||||
|
|
@ -79,6 +80,16 @@
|
||||||
<patternset refid="java.source.pattern" />
|
<patternset refid="java.source.pattern" />
|
||||||
</fileset>
|
</fileset>
|
||||||
|
|
||||||
|
<!-- terrible hack to get gatkdocs to see all files -->
|
||||||
|
<patternset id="all.java.source.pattern">
|
||||||
|
<include name="${java.public.source.dir}/**/*.java" />
|
||||||
|
<include name="${java.private.source.dir}/**/*.java" />
|
||||||
|
</patternset>
|
||||||
|
|
||||||
|
<fileset id="all.java.source.files" dir="${basedir}">
|
||||||
|
<patternset refid="all.java.source.pattern" />
|
||||||
|
</fileset>
|
||||||
|
|
||||||
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
|
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
|
||||||
<include name="**/*.java" />
|
<include name="**/*.java" />
|
||||||
</fileset>
|
</fileset>
|
||||||
|
|
@ -160,6 +171,16 @@
|
||||||
<property name="scala.target" value="core"/>
|
<property name="scala.target" value="core"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="init.buildpublic">
|
||||||
|
<!-- Set the properties needed to build public only -->
|
||||||
|
<property name="gatk.target" value="core"/>
|
||||||
|
<property name="scala.target" value="core"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="init.usecontracts">
|
||||||
|
<property name="use.contracts" value="true" />
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="git.describe">
|
<target name="git.describe">
|
||||||
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
|
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
|
||||||
<arg line="describe" />
|
<arg line="describe" />
|
||||||
|
|
@ -234,6 +255,7 @@
|
||||||
|
|
||||||
<!-- Create the build directory structure used by compile -->
|
<!-- Create the build directory structure used by compile -->
|
||||||
<mkdir dir="${build.dir}"/>
|
<mkdir dir="${build.dir}"/>
|
||||||
|
<mkdir dir="${lib.dir}"/>
|
||||||
<mkdir dir="${java.classes}"/>
|
<mkdir dir="${java.classes}"/>
|
||||||
<mkdir dir="${java.contracts}"/>
|
<mkdir dir="${java.contracts}"/>
|
||||||
|
|
||||||
|
|
@ -265,7 +287,7 @@
|
||||||
</taskdef>
|
</taskdef>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="gatk.compile.public.source" depends="tribble,init,resolve">
|
<target name="gatk.compile.public.source" depends="init,resolve">
|
||||||
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
|
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
|
||||||
<compilerarg value="-proc:none"/>
|
<compilerarg value="-proc:none"/>
|
||||||
</javac>
|
</javac>
|
||||||
|
|
@ -326,7 +348,7 @@
|
||||||
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
|
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
|
||||||
description="create GATK contracts" if="include.contracts" />
|
description="create GATK contracts" if="include.contracts" />
|
||||||
|
|
||||||
<target name="gatk.compile" depends="tribble,init,resolve,gatk.compile.source,gatk.contracts" />
|
<target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
|
||||||
|
|
||||||
<target name="init.queue-extensions.generate" depends="gatk.compile">
|
<target name="init.queue-extensions.generate" depends="gatk.compile">
|
||||||
<condition property="uptodate.queue-extensions.generate">
|
<condition property="uptodate.queue-extensions.generate">
|
||||||
|
|
@ -457,6 +479,35 @@
|
||||||
</javadoc>
|
</javadoc>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<target name="clean.gatkdocs">
|
||||||
|
<delete dir="gatkdocs"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="gatkdocs" depends="gatk.compile"
|
||||||
|
description="Extract help key/value pair file from the JavaDoc tags.">
|
||||||
|
<path id="doclet.classpath">
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.classes}" />
|
||||||
|
</path>
|
||||||
|
|
||||||
|
<!-- Run with -Dgatkdocs.include.hidden=true to include documentation for hidden features -->
|
||||||
|
<condition property="gatkdocs.include.hidden.arg" value="-include-hidden" else="">
|
||||||
|
<isset property="gatkdocs.include.hidden" />
|
||||||
|
</condition>
|
||||||
|
|
||||||
|
<javadoc doclet="org.broadinstitute.sting.utils.help.GATKDoclet"
|
||||||
|
docletpathref="doclet.classpath"
|
||||||
|
classpathref="external.dependencies"
|
||||||
|
classpath="${java.classes}"
|
||||||
|
additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> <!-- -test to only do DocumentationTest walker -->
|
||||||
|
<sourcefiles>
|
||||||
|
<union>
|
||||||
|
<fileset refid="all.java.source.files"/>
|
||||||
|
</union>
|
||||||
|
</sourcefiles>
|
||||||
|
</javadoc>
|
||||||
|
</target>
|
||||||
|
|
||||||
<target name="sting.compile" depends="gatk.compile, scala.compile" />
|
<target name="sting.compile" depends="gatk.compile, scala.compile" />
|
||||||
|
|
||||||
<target name="init.jar" depends="sting.compile,extracthelp">
|
<target name="init.jar" depends="sting.compile,extracthelp">
|
||||||
|
|
@ -490,6 +541,7 @@
|
||||||
<include name="**/utils/codecs/**/*.class"/>
|
<include name="**/utils/codecs/**/*.class"/>
|
||||||
<include name="**/utils/variantcontext/**/*.class"/>
|
<include name="**/utils/variantcontext/**/*.class"/>
|
||||||
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
||||||
|
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
</jar>
|
</jar>
|
||||||
</target>
|
</target>
|
||||||
|
|
@ -657,54 +709,6 @@
|
||||||
</antcall>
|
</antcall>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="test.init.compile">
|
|
||||||
<mkdir dir="${java.test.classes}"/>
|
|
||||||
<mkdir dir="${scala.test.classes}"/>
|
|
||||||
<antcall target="resolve">
|
|
||||||
<param name="ivy.conf" value="test"/>
|
|
||||||
</antcall>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
|
|
||||||
<echo message="Sting: Compiling test cases!"/>
|
|
||||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
|
||||||
<src path="${java.public.test.sources}"/>
|
|
||||||
<src path="${java.private.test.sources}"/>
|
|
||||||
<classpath>
|
|
||||||
<path refid="external.dependencies" />
|
|
||||||
<pathelement location="${java.classes}"/>
|
|
||||||
<pathelement location="${java.contracts}"/>
|
|
||||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
</classpath>
|
|
||||||
<compilerarg value="-proc:none"/>
|
|
||||||
<!--
|
|
||||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
|
||||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
|
||||||
-->
|
|
||||||
</javac>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
|
||||||
<echo message="Scala: Compiling test cases!"/>
|
|
||||||
<antcall target="resolve">
|
|
||||||
<param name="ivy.conf" value="test"/>
|
|
||||||
</antcall>
|
|
||||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
|
||||||
<src path="${scala.public.test.sources}" />
|
|
||||||
<src path="${scala.private.test.sources}" />
|
|
||||||
<include name="**/*.scala"/>
|
|
||||||
<classpath>
|
|
||||||
<path refid="scala.dependencies"/>
|
|
||||||
<pathelement location="${scala.test.classes}"/>
|
|
||||||
<pathelement location="${java.test.classes}"/>
|
|
||||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
</classpath>
|
|
||||||
</scalac>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="test.compile" depends="test.java.compile,test.scala.compile">
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- new scala target -->
|
<!-- new scala target -->
|
||||||
|
|
||||||
<target name="scala" description="build the scala directory">
|
<target name="scala" description="build the scala directory">
|
||||||
|
|
@ -718,20 +722,113 @@
|
||||||
<!-- ***************************************************************************** -->
|
<!-- ***************************************************************************** -->
|
||||||
<!-- where to put reports and tests-->
|
<!-- where to put reports and tests-->
|
||||||
<property name="report" value="${build.dir}/report"/>
|
<property name="report" value="${build.dir}/report"/>
|
||||||
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
|
||||||
<property name="test.output" value="${dist.dir}/test"/>
|
<property name="test.output" value="${dist.dir}/test"/>
|
||||||
<property name="java.public.test.sources" value="public/java/test"/>
|
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
||||||
<property name="java.private.test.sources" value="private/java/test"/>
|
<property name="java.public.test.classes" value="${java.test.classes}/public"/>
|
||||||
|
<property name="java.private.test.classes" value="${java.test.classes}/private"/>
|
||||||
|
<property name="java.public.test.sources" value="${public.dir}/java/test"/>
|
||||||
|
<property name="java.private.test.sources" value="${private.dir}/java/test"/>
|
||||||
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
||||||
<property name="scala.public.test.sources" value="public/scala/test"/>
|
<property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
|
||||||
<property name="scala.private.test.sources" value="private/scala/test"/>
|
<property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
|
||||||
|
<property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
|
||||||
|
<property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
|
||||||
|
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
|
||||||
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
||||||
<property name="test.maxmemory" value="4g"/>
|
<property name="test.maxmemory" value="4g"/>
|
||||||
|
|
||||||
|
<target name="test.init.compile">
|
||||||
|
<mkdir dir="${java.test.classes}"/>
|
||||||
|
<mkdir dir="${scala.test.classes}"/>
|
||||||
|
<antcall target="resolve">
|
||||||
|
<param name="ivy.conf" value="test"/>
|
||||||
|
</antcall>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.public.compile" depends="dist,test.init.compile">
|
||||||
|
<mkdir dir="${java.public.test.classes}"/>
|
||||||
|
<echo message="Sting: Compiling public test cases!"/>
|
||||||
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
|
<src path="${java.public.test.sources}"/>
|
||||||
|
<classpath>
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.classes}"/>
|
||||||
|
<pathelement location="${java.contracts}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
<compilerarg value="-proc:none"/>
|
||||||
|
<!--
|
||||||
|
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||||
|
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||||
|
-->
|
||||||
|
</javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
|
||||||
|
<mkdir dir="${java.private.test.classes}"/>
|
||||||
|
<echo message="Sting: Compiling private test cases!"/>
|
||||||
|
<javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||||
|
<src path="${java.private.test.sources}"/>
|
||||||
|
<classpath>
|
||||||
|
<path refid="external.dependencies" />
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.classes}"/>
|
||||||
|
<pathelement location="${java.contracts}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
<compilerarg value="-proc:none"/>
|
||||||
|
<!--
|
||||||
|
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||||
|
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||||
|
-->
|
||||||
|
</javac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
|
||||||
|
|
||||||
|
<target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
||||||
|
<mkdir dir="${scala.public.test.classes}"/>
|
||||||
|
<echo message="Scala: Compiling public test cases!"/>
|
||||||
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
|
||||||
|
<src path="${scala.public.test.sources}" />
|
||||||
|
<classpath>
|
||||||
|
<path refid="scala.dependencies"/>
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
</scalac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
|
||||||
|
<mkdir dir="${scala.private.test.classes}"/>
|
||||||
|
<echo message="Scala: Compiling private test cases!"/>
|
||||||
|
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
|
||||||
|
<src path="${scala.private.test.sources}" />
|
||||||
|
<classpath>
|
||||||
|
<path refid="scala.dependencies"/>
|
||||||
|
<pathelement location="${scala.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.public.test.classes}"/>
|
||||||
|
<pathelement location="${java.private.test.classes}"/>
|
||||||
|
<pathelement location="${testng.jar}"/>
|
||||||
|
</classpath>
|
||||||
|
</scalac>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
|
||||||
|
|
||||||
|
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
|
||||||
|
|
||||||
<!-- TEST -->
|
<!-- TEST -->
|
||||||
<macrodef name="run-test">
|
<macrodef name="run-test">
|
||||||
<attribute name="testtype"/>
|
<attribute name="testtype"/>
|
||||||
|
<attribute name="outputdir"/>
|
||||||
|
<attribute name="runfailed"/>
|
||||||
|
|
||||||
<sequential>
|
<sequential>
|
||||||
|
<condition property="run.failed.tests">
|
||||||
|
<equals arg1="@{runfailed}" arg2="true"/>
|
||||||
|
</condition>
|
||||||
|
|
||||||
<!-- Get the pipeline run type. Default to dry. -->
|
<!-- Get the pipeline run type. Default to dry. -->
|
||||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||||
|
|
@ -741,10 +838,10 @@
|
||||||
<isset property="include.contracts" />
|
<isset property="include.contracts" />
|
||||||
</condition>
|
</condition>
|
||||||
|
|
||||||
<mkdir dir="${report}/@{testtype}"/>
|
<mkdir dir="@{outputdir}"/>
|
||||||
<echo message="Sting: Running @{testtype} test cases!"/>
|
<echo message="Sting: Running @{testtype} test cases!"/>
|
||||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
|
||||||
<testng outputDir="${report}/@{testtype}"
|
<testng outputDir="@{outputdir}"
|
||||||
haltOnFailure="false" failureProperty="test.failure"
|
haltOnFailure="false" failureProperty="test.failure"
|
||||||
verbose="2"
|
verbose="2"
|
||||||
workingDir="${basedir}"
|
workingDir="${basedir}"
|
||||||
|
|
@ -762,199 +859,163 @@
|
||||||
<pathelement location="${java.classes}" />
|
<pathelement location="${java.classes}" />
|
||||||
<pathelement location="${scala.classes}" />
|
<pathelement location="${scala.classes}" />
|
||||||
<pathelement location="${java.contracts}" />
|
<pathelement location="${java.contracts}" />
|
||||||
<pathelement location="${java.test.classes}" />
|
<pathelement location="${java.public.test.classes}" />
|
||||||
<pathelement location="${scala.test.classes}" />
|
<pathelement location="${java.private.test.classes}" />
|
||||||
|
<pathelement location="${scala.public.test.classes}" />
|
||||||
|
<pathelement location="${scala.private.test.classes}" />
|
||||||
</classpath>
|
</classpath>
|
||||||
|
|
||||||
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
|
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
|
||||||
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
|
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}.class" if="include.private"/>
|
||||||
|
</classfileset>
|
||||||
|
<classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}*.class" if="scala.include"/>
|
||||||
|
</classfileset>
|
||||||
|
<classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
|
||||||
|
<include name="**/@{testtype}*.class" if="include.scala.private"/>
|
||||||
|
</classfileset>
|
||||||
|
|
||||||
|
<xmlfileset dir="${basedir}">
|
||||||
|
<include name="@{testtype}" if="run.failed.tests"/>
|
||||||
|
</xmlfileset>
|
||||||
</testng>
|
</testng>
|
||||||
|
|
||||||
<!-- generate a report for Bamboo or Hudson to read in -->
|
<!-- generate a report for Bamboo or Hudson to read in -->
|
||||||
<junitreport todir="${report}/@{testtype}">
|
<junitreport todir="@{outputdir}">
|
||||||
<fileset dir="${report}/@{testtype}">
|
<fileset dir="@{outputdir}">
|
||||||
<include name="*/*.xml"/>
|
<include name="*/*.xml"/>
|
||||||
</fileset>
|
</fileset>
|
||||||
<report format="noframes" todir="${report}/@{testtype}"/>
|
<report format="noframes" todir="@{outputdir}"/>
|
||||||
</junitreport>
|
</junitreport>
|
||||||
<fail message="test failed" if="test.failure" />
|
|
||||||
</sequential>
|
|
||||||
</macrodef>
|
|
||||||
|
|
||||||
<!-- FAILED-TEST -->
|
|
||||||
<macrodef name="run-failed-test">
|
|
||||||
<attribute name="xmlfailedtestfile" />
|
|
||||||
<sequential>
|
|
||||||
<!-- Get the pipeline run type. Default to dry. -->
|
|
||||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
|
||||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
|
||||||
</condition>
|
|
||||||
|
|
||||||
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
|
|
||||||
<isset property="include.contracts" />
|
|
||||||
</condition>
|
|
||||||
|
|
||||||
<mkdir dir="${report}/failed_rerun" />
|
|
||||||
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
|
|
||||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
|
||||||
<testng outputDir="${report}/failed_rerun"
|
|
||||||
haltOnFailure="false" failureProperty="test.failure"
|
|
||||||
verbose="2"
|
|
||||||
workingDir="${basedir}"
|
|
||||||
useDefaultListeners="false"
|
|
||||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
|
|
||||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
|
||||||
<jvmarg value="-Djava.awt.headless=true" />
|
|
||||||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
|
||||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
|
||||||
<jvmarg line="${cofoja.jvm.args}"/>
|
|
||||||
<!-- <jvmarg value="-Xdebug"/> -->
|
|
||||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
|
||||||
<classpath>
|
|
||||||
<path refid="external.dependencies" />
|
|
||||||
<pathelement location="${java.classes}" />
|
|
||||||
<pathelement location="${scala.classes}" />
|
|
||||||
<pathelement location="${java.contracts}" />
|
|
||||||
<pathelement location="${java.test.classes}" />
|
|
||||||
<pathelement location="${scala.test.classes}" />
|
|
||||||
</classpath>
|
|
||||||
|
|
||||||
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
|
|
||||||
</testng>
|
|
||||||
|
|
||||||
<fail message="test failed" if="test.failure" />
|
<fail message="test failed" if="test.failure" />
|
||||||
</sequential>
|
</sequential>
|
||||||
</macrodef>
|
</macrodef>
|
||||||
|
|
||||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
<target name="alltests">
|
||||||
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
|
<antcall target="test" inheritAll="false"/>
|
||||||
|
<antcall target="integrationtest" inheritAll="false"/>
|
||||||
|
<antcall target="pipelinetest" inheritAll="false"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<target name="alltests.public">
|
||||||
|
<antcall target="test.public" inheritAll="false"/>
|
||||||
|
<antcall target="integrationtest.public" inheritAll="false"/>
|
||||||
|
<antcall target="pipelinetest.public" inheritAll="false"/>
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
|
||||||
|
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
|
||||||
<condition property="ttype" value="*UnitTest" else="${single}">
|
<condition property="ttype" value="*UnitTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${ttype}"/>
|
<run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="integrationtest" depends="test.compile" description="Run integration tests">
|
<target name="test.public" depends="init.buildpublic,test"/>
|
||||||
|
|
||||||
|
<target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
|
||||||
<condition property="itype" value="*IntegrationTest" else="${single}">
|
<condition property="itype" value="*IntegrationTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${itype}"/>
|
<run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="performancetest" depends="test.compile" description="Run performance tests">
|
<target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
|
||||||
|
|
||||||
|
<target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
|
||||||
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${ptype}"/>
|
<run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
|
<target name="performancetest.public" depends="init.buildpublic,performancetest" />
|
||||||
|
|
||||||
|
<target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${pipetype}"/>
|
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||||
</target>
|
</target>
|
||||||
<target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
|
<target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
|
||||||
|
|
||||||
|
<target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||||
<property name="pipeline.run" value="run"/>
|
<property name="pipeline.run" value="run"/>
|
||||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||||
<not><isset property="single"/></not>
|
<not><isset property="single"/></not>
|
||||||
</condition>
|
</condition>
|
||||||
<run-test testtype="${pipetype}"/>
|
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||||
|
</target>
|
||||||
|
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
|
||||||
|
|
||||||
|
<target name="failed-test" depends="init.buildall,test.compile">
|
||||||
|
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-test" depends="test.compile">
|
<target name="failed-integration" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
|
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-integration" depends="test.compile">
|
<target name="failed-performance" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
|
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-performance" depends="test.compile">
|
<target name="failed-pipeline" depends="init.buildall,test.compile">
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
|
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="failed-pipeline" depends="test.compile">
|
<!-- ******************************************************************************** -->
|
||||||
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
|
<!-- Javadoc -->
|
||||||
</target>
|
<!-- ******************************************************************************** -->
|
||||||
|
|
||||||
<!-- ***************************************************************************** -->
|
|
||||||
<!-- *********** Tribble ********* -->
|
|
||||||
<!-- ***************************************************************************** -->
|
|
||||||
<target name="tribble.init" description="checks if tribble is available to build from source">
|
|
||||||
<condition property="tribble.compile.exists">
|
|
||||||
<available file="${tribble.dir}/build.xml"/>
|
|
||||||
</condition>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- compile the library -->
|
|
||||||
<target name="tribble.compile" description="compiles the tribble library" depends="tribble.init" if="tribble.compile.exists">
|
|
||||||
<echo message="Building the Tribble Library..."/>
|
|
||||||
<ant antfile="build.xml" target="all" dir="${tribble.dir}" inheritAll="false"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- copy the compiled library -->
|
|
||||||
<target name="tribble.compile.copy" description="Copies the compiled tribble library" depends="tribble.compile" if="tribble.compile.exists">
|
|
||||||
<copy todir="${lib.dir}">
|
|
||||||
<fileset dir="${tribble.dir}/dist" includes="*.jar"/>
|
|
||||||
</copy>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- copy the precompiled library -->
|
|
||||||
<target name="tribble.library.copy" description="Copies the precompiled tribble library" depends="tribble.init" unless="tribble.compile.exists">
|
|
||||||
<echo message="Copying the Tribble Library..."/>
|
|
||||||
<copy todir="${lib.dir}">
|
|
||||||
<fileset dir="settings/repository/org.broad" includes="tribble*.jar"/>
|
|
||||||
</copy>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<target name="tribble" description="Copies the tribble jar" depends="tribble.compile.copy,tribble.library.copy"/>
|
|
||||||
|
|
||||||
<target name="tribble.test.init" description="runs the tribble tests" depends="tribble.init">
|
|
||||||
<condition property="tribble.test.run">
|
|
||||||
<and>
|
|
||||||
<isset property="tribble.compile.exists"/>
|
|
||||||
<not><isset property="single"/></not>
|
|
||||||
</and>
|
|
||||||
</condition>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- test tribble using the unit tests set in tribble -->
|
|
||||||
<target name="tribble.test" description="runs the tribble tests" depends="tribble.test.init,tribble.compile" if="tribble.test.run">
|
|
||||||
<echo message="Testing the Tribble Library..."/>
|
|
||||||
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
|
|
||||||
</target>
|
|
||||||
|
|
||||||
<!-- clean tribble -->
|
|
||||||
<target name="tribble.clean" description="cleans the tribble library" depends="tribble.init" if="tribble.compile.exists">
|
|
||||||
<echo message="Cleaning the Tribble Library..."/>
|
|
||||||
<ant antfile="build.xml" target="clean" dir="${tribble.dir}" inheritAll="false"/>
|
|
||||||
</target>
|
|
||||||
<!-- ***************************************************************************** -->
|
|
||||||
|
|
||||||
<target name="clean.javadoc">
|
<target name="clean.javadoc">
|
||||||
<delete dir="javadoc"/>
|
<delete dir="${javadoc.dir}" />
|
||||||
<delete dir="scaladoc"/>
|
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc">
|
<target name="init.javadoc">
|
||||||
<mkdir dir="javadoc"/>
|
<mkdir dir="${javadoc.dir}" />
|
||||||
<javadoc destdir="javadoc"
|
</target>
|
||||||
classpathref="external.dependencies">
|
|
||||||
<sourcepath path="${java.public.source.dir}"/>
|
<target name="javadoc" depends="init.buildpublic,generate.javadoc" description="Generates public javadoc" />
|
||||||
<sourcepath path="${external.dir}"/>
|
|
||||||
|
<target name="javadoc.private" depends="init.buildall,generate.javadoc" description="Generates public and private javadoc" />
|
||||||
|
|
||||||
|
<target name="generate.javadoc" depends="init.javadoc,resolve">
|
||||||
|
<javadoc destdir="${javadoc.dir}" classpathref="external.dependencies">
|
||||||
|
<fileset refid="java.source.files" />
|
||||||
|
<sourcepath path="${external.dir}" />
|
||||||
</javadoc>
|
</javadoc>
|
||||||
<javadoc destdir="javadoc"
|
</target>
|
||||||
classpathref="external.dependencies">
|
|
||||||
<sourcepath path="${java.private.source.dir}"/>
|
<!-- ******************************************************************************** -->
|
||||||
<exclude name="**" unless="include.private" />
|
<!-- Scaladoc -->
|
||||||
</javadoc>
|
<!-- ******************************************************************************** -->
|
||||||
<mkdir dir="scaladoc"/>
|
|
||||||
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
<target name="clean.scaladoc">
|
||||||
<src path="${scala.public.source.dir}"/>
|
<delete dir="${scaladoc.dir}" />
|
||||||
<src path="${scala.private.source.dir}"/>
|
</target>
|
||||||
<src path="${queue-extensions.source.dir}"/>
|
|
||||||
<include name="**/*.scala"/>
|
<target name="init.scaladoc">
|
||||||
|
<mkdir dir="${scaladoc.dir}" />
|
||||||
|
</target>
|
||||||
|
|
||||||
|
<!-- NOTE: the scaladoc targets require that the environment variable ANT_OPTS has been set to "-Xmx1G" -->
|
||||||
|
|
||||||
|
<target name="scaladoc" depends="init.buildpublic,generate.scaladoc" description="Generates public scaladoc -- set ANT_OPTS to -Xmx1G" />
|
||||||
|
|
||||||
|
<target name="scaladoc.private" depends="init.buildall,generate.scaladoc" description="Generates public and private scaladoc -- set ANT_OPTS to -Xmx1G" />
|
||||||
|
|
||||||
|
<target name="generate.scaladoc" depends="resolve,queue-extensions.generate,init.scala.compile,scala.compile,init.scaladoc">
|
||||||
|
<scaladoc srcdir="${basedir}" destdir="${scaladoc.dir}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||||
|
<include name="${scala.public.source.dir}/**/*.scala" />
|
||||||
|
<include name="${queue-extensions.source.dir}/**/*.scala" />
|
||||||
|
<include name="${scala.private.source.dir}/**/*.scala" if="include.private" />
|
||||||
</scaladoc>
|
</scaladoc>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
|
<!-- ******************************************************************************** -->
|
||||||
|
<!-- Release-related tasks -->
|
||||||
|
<!-- ******************************************************************************** -->
|
||||||
|
|
||||||
<!-- Unzip all classes from their current locations and assemble them in a staging directory -->
|
<!-- Unzip all classes from their current locations and assemble them in a staging directory -->
|
||||||
<target name="stage" description="stage files for distribution">
|
<target name="stage" description="stage files for distribution">
|
||||||
<mkdir dir="staging"/>
|
<mkdir dir="staging"/>
|
||||||
|
|
@ -1044,7 +1105,7 @@
|
||||||
</findbugs>
|
</findbugs>
|
||||||
</target>
|
</target>
|
||||||
|
|
||||||
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc">
|
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
|
||||||
<delete dir="out"/>
|
<delete dir="out"/>
|
||||||
<delete dir="${build.dir}"/>
|
<delete dir="${build.dir}"/>
|
||||||
<delete dir="${lib.dir}"/>
|
<delete dir="${lib.dir}"/>
|
||||||
|
|
|
||||||
6
ivy.xml
6
ivy.xml
|
|
@ -12,6 +12,9 @@
|
||||||
<dependency org="net.sf" name="picard" rev="latest.integration"/>
|
<dependency org="net.sf" name="picard" rev="latest.integration"/>
|
||||||
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
|
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
|
||||||
|
|
||||||
|
<!-- Tribble -->
|
||||||
|
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||||
|
|
||||||
<dependency org="log4j" name="log4j" rev="1.2.15">
|
<dependency org="log4j" name="log4j" rev="1.2.15">
|
||||||
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
||||||
<exclude org="javax.mail" />
|
<exclude org="javax.mail" />
|
||||||
|
|
@ -30,6 +33,9 @@
|
||||||
|
|
||||||
<!-- Dependencies for the graph aligner -->
|
<!-- Dependencies for the graph aligner -->
|
||||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||||
|
|
||||||
|
<!-- Dependencies for the html walker documention -->
|
||||||
|
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
|
||||||
|
|
||||||
<!-- Commons Dependencies -->
|
<!-- Commons Dependencies -->
|
||||||
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,169 @@
|
||||||
|
library(gsalib)
|
||||||
|
require("ggplot2")
|
||||||
|
require("gplots")
|
||||||
|
|
||||||
|
#
|
||||||
|
# Standard command line switch. Can we loaded interactively for development
|
||||||
|
# or executed with RScript
|
||||||
|
#
|
||||||
|
args = commandArgs(TRUE)
|
||||||
|
onCMDLine = ! is.na(args[1])
|
||||||
|
if ( onCMDLine ) {
|
||||||
|
inputFileName = args[1]
|
||||||
|
outputPDF = args[2]
|
||||||
|
} else {
|
||||||
|
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||||
|
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||||
|
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||||
|
outputPDF = NA
|
||||||
|
}
|
||||||
|
|
||||||
|
RUNTIME_UNITS = "(sec)"
|
||||||
|
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
||||||
|
|
||||||
|
#
|
||||||
|
# Helper function to aggregate all of the jobs in the report across all tables
|
||||||
|
#
|
||||||
|
allJobsFromReport <- function(report) {
|
||||||
|
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
|
||||||
|
sub <- lapply(report, function(table) table[,names])
|
||||||
|
do.call("rbind", sub)
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||||
|
#
|
||||||
|
plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||||
|
allJobs = allJobsFromReport(gatkReport)
|
||||||
|
if ( sortOverall ) {
|
||||||
|
title = "All jobs, by analysis, by start time"
|
||||||
|
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
|
||||||
|
} else {
|
||||||
|
title = "All jobs, sorted by start time"
|
||||||
|
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
|
||||||
|
}
|
||||||
|
allJobs$index = 1:nrow(allJobs)
|
||||||
|
minTime = min(allJobs$startTime)
|
||||||
|
allJobs$relStartTime = allJobs$startTime - minTime
|
||||||
|
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||||
|
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||||
|
maxRelTime = max(allJobs$relDoneTime)
|
||||||
|
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||||
|
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
||||||
|
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||||
|
p <- p + xlim(0, maxRelTime * 1.1)
|
||||||
|
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||||
|
p <- p + ylab("Job")
|
||||||
|
p <- p + opts(title=title)
|
||||||
|
print(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Plots scheduling efficiency at job events
|
||||||
|
#
|
||||||
|
plotProgressByTime <- function(gatkReport) {
|
||||||
|
allJobs = allJobsFromReport(gatkReport)
|
||||||
|
nJobs = dim(allJobs)[1]
|
||||||
|
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
|
||||||
|
allJobs$index = 1:nrow(allJobs)
|
||||||
|
|
||||||
|
minTime = min(allJobs$startTime)
|
||||||
|
allJobs$relStartTime = allJobs$startTime - minTime
|
||||||
|
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||||
|
|
||||||
|
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
|
||||||
|
|
||||||
|
countJobs <- function(p) {
|
||||||
|
s = allJobs$relStartTime
|
||||||
|
e = allJobs$relDoneTime
|
||||||
|
x = c() # I wish I knew how to make this work with apply
|
||||||
|
for ( time in times )
|
||||||
|
x = c(x, sum(p(s, e, time)))
|
||||||
|
x
|
||||||
|
}
|
||||||
|
|
||||||
|
pending = countJobs(function(s, e, t) s > t)
|
||||||
|
done = countJobs(function(s, e, t) e < t)
|
||||||
|
running = nJobs - pending - done
|
||||||
|
|
||||||
|
d = data.frame(times=times, pending=pending, running=running, done=done)
|
||||||
|
|
||||||
|
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
|
||||||
|
p <- p + facet_grid(variable ~ ., scales="free")
|
||||||
|
p <- p + geom_line(size=2)
|
||||||
|
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
|
||||||
|
p <- p + opts(title = "Job scheduling")
|
||||||
|
print(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Creates tables for each job in this group
|
||||||
|
#
|
||||||
|
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
|
||||||
|
plotGroup <- function(groupTable) {
|
||||||
|
name = unique(groupTable$analysisName)[1]
|
||||||
|
groupAnnotations = setdiff(names(groupTable), standardColumns)
|
||||||
|
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
|
||||||
|
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
|
||||||
|
|
||||||
|
# create a table showing each job and all annotations
|
||||||
|
textplot(sub, show.rownames=F)
|
||||||
|
title(paste("Job summary for", name, "full itemization"), cex=3)
|
||||||
|
|
||||||
|
# create the table for each combination of values in the group, listing iterations in the columns
|
||||||
|
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
|
||||||
|
textplot(as.data.frame(sum), show.rownames=F)
|
||||||
|
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
||||||
|
|
||||||
|
# histogram of job times by groupAnnotations
|
||||||
|
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
|
||||||
|
# todo -- how do we group by annotations?
|
||||||
|
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||||
|
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
|
||||||
|
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||||
|
print(p)
|
||||||
|
}
|
||||||
|
|
||||||
|
# as above, but averaging over all iterations
|
||||||
|
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||||
|
if ( dim(sub)[1] > 1 ) {
|
||||||
|
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||||
|
textplot(as.data.frame(sum), show.rownames=F)
|
||||||
|
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# print out some useful basic information
|
||||||
|
print("Report")
|
||||||
|
print(paste("Project :", inputFileName))
|
||||||
|
|
||||||
|
convertUnits <- function(gatkReportData) {
|
||||||
|
convertGroup <- function(g) {
|
||||||
|
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
|
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
|
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
|
||||||
|
g
|
||||||
|
}
|
||||||
|
lapply(gatkReportData, convertGroup)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# read the table
|
||||||
|
gatkReportData <- gsa.read.gatkreport(inputFileName)
|
||||||
|
gatkReportData <- convertUnits(gatkReportData)
|
||||||
|
#print(summary(gatkReportData))
|
||||||
|
|
||||||
|
if ( ! is.na(outputPDF) ) {
|
||||||
|
pdf(outputPDF, height=8.5, width=11)
|
||||||
|
}
|
||||||
|
|
||||||
|
plotJobsGantt(gatkReportData, T)
|
||||||
|
plotJobsGantt(gatkReportData, F)
|
||||||
|
plotProgressByTime(gatkReportData)
|
||||||
|
for ( group in gatkReportData ) {
|
||||||
|
plotGroup(group)
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! is.na(outputPDF) ) {
|
||||||
|
dev.off()
|
||||||
|
}
|
||||||
|
|
@ -20,6 +20,20 @@
|
||||||
assign(tableName, d, envir=tableEnv);
|
assign(tableName, d, envir=tableEnv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Read a fixed width line of text into a list.
|
||||||
|
.gsa.splitFixedWidth <- function(line, columnStarts) {
|
||||||
|
splitStartStop <- function(x) {
|
||||||
|
x = substring(x, starts, stops);
|
||||||
|
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
|
||||||
|
x;
|
||||||
|
}
|
||||||
|
|
||||||
|
starts = c(1, columnStarts);
|
||||||
|
stops = c(columnStarts - 1, nchar(line));
|
||||||
|
|
||||||
|
sapply(line, splitStartStop)[,1];
|
||||||
|
}
|
||||||
|
|
||||||
# Load all GATKReport tables from a file
|
# Load all GATKReport tables from a file
|
||||||
gsa.read.gatkreport <- function(filename) {
|
gsa.read.gatkreport <- function(filename) {
|
||||||
con = file(filename, "r", blocking = TRUE);
|
con = file(filename, "r", blocking = TRUE);
|
||||||
|
|
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
|
||||||
tableName = NA;
|
tableName = NA;
|
||||||
tableHeader = c();
|
tableHeader = c();
|
||||||
tableRows = c();
|
tableRows = c();
|
||||||
|
version = NA;
|
||||||
|
|
||||||
for (line in lines) {
|
for (line in lines) {
|
||||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
|
||||||
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
||||||
|
|
||||||
if (!is.na(tableName)) {
|
if (!is.na(tableName)) {
|
||||||
|
|
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
|
||||||
tableName = headerFields[2];
|
tableName = headerFields[2];
|
||||||
tableHeader = c();
|
tableHeader = c();
|
||||||
tableRows = c();
|
tableRows = c();
|
||||||
|
|
||||||
|
# For differences in versions see
|
||||||
|
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
|
||||||
|
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||||
|
version = "v0.1";
|
||||||
|
|
||||||
|
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||||
|
version = "v0.2";
|
||||||
|
columnStarts = c();
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
||||||
# do nothing
|
# do nothing
|
||||||
} else if (!is.na(tableName)) {
|
} else if (!is.na(tableName)) {
|
||||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
|
||||||
|
if (version == "v0.1") {
|
||||||
|
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||||
|
|
||||||
|
} else if (version == "v0.2") {
|
||||||
|
if (length(tableHeader) == 0) {
|
||||||
|
headerChars = unlist(strsplit(line, ""));
|
||||||
|
# Find the first position of non space characters, excluding the first character
|
||||||
|
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
row = .gsa.splitFixedWidth(line, columnStarts);
|
||||||
|
}
|
||||||
|
|
||||||
if (length(tableHeader) == 0) {
|
if (length(tableHeader) == 0) {
|
||||||
tableHeader = row;
|
tableHeader = row;
|
||||||
} else {
|
} else {
|
||||||
tableRows = rbind(tableRows, row);
|
tableRows = rbind(tableRows, row);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,6 @@
|
||||||
|
|
||||||
package net.sf.picard.reference;
|
package net.sf.picard.reference;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
||||||
|
|
@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
* Produces fai file with same output as samtools faidx
|
* Produces fai file with same output as samtools faidx
|
||||||
*/
|
*/
|
||||||
public class FastaSequenceIndexBuilder {
|
public class FastaSequenceIndexBuilder {
|
||||||
public File fastaFile;
|
final public File fastaFile;
|
||||||
ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
|
final boolean printProgress;
|
||||||
|
|
||||||
// keep track of location in file
|
// keep track of location in file
|
||||||
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
|
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
|
||||||
|
|
@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
|
||||||
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
|
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
|
||||||
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
|
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
|
||||||
|
|
||||||
public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
|
public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
|
||||||
this.progress = progress;
|
|
||||||
this.fastaFile = fastaFile;
|
this.fastaFile = fastaFile;
|
||||||
fileLength = fastaFile.length();
|
fileLength = fastaFile.length();
|
||||||
|
this.printProgress = printProgress;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
|
||||||
|
|
||||||
if (System.currentTimeMillis() - lastTimestamp > 10000) {
|
if (System.currentTimeMillis() - lastTimestamp > 10000) {
|
||||||
int percentProgress = (int) (100*bytesRead/fileLength);
|
int percentProgress = (int) (100*bytesRead/fileLength);
|
||||||
if (progress != null)
|
if (printProgress)
|
||||||
progress.percentProgress(percentProgress);
|
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
|
||||||
lastTimestamp = System.currentTimeMillis();
|
lastTimestamp = System.currentTimeMillis();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,30 +31,85 @@ import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
||||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Call R scripts to plot residual error versus the various covariates.
|
||||||
* User: rpoplin
|
*
|
||||||
* Date: Dec 1, 2009
|
* <p>
|
||||||
|
* After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
|
||||||
|
* reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
|
||||||
|
* show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
|
||||||
|
* In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
|
||||||
|
* CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
|
||||||
|
* by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
|
||||||
|
* error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
|
||||||
|
* bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
|
||||||
|
* each of the quality score estimates. It is defined as follows for N, the number of observations:
|
||||||
|
*
|
||||||
|
* <ul>
|
||||||
|
* <li>light blue means N < 1,000</li>
|
||||||
|
* <li>cornflower blue means 1,000 <= N < 10,000</li>
|
||||||
|
* <li>dark blue means N >= 10,000</li>
|
||||||
|
* <li>The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
|
||||||
|
* meaningless and so aren't included in any of the numerical calculations.</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
|
||||||
|
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
|
||||||
|
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
|
||||||
|
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
|
||||||
|
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
|
||||||
|
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx4g -jar AnalyzeCovariates.jar \
|
||||||
|
* -recalFile /path/to/recal.table.csv \
|
||||||
|
* -outputDir /path/to/output_dir/ \
|
||||||
|
* -resources resources/ \
|
||||||
|
* -ignoreQ 5
|
||||||
|
* </pre>
|
||||||
*
|
*
|
||||||
* Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
@DocumentedGATKFeature(
|
||||||
|
groupName = "AnalyzeCovariates",
|
||||||
|
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
|
||||||
public class AnalyzeCovariates extends CommandLineProgram {
|
public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
|
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
// Command Line Arguments
|
// Command Line Arguments
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
/**
|
||||||
|
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||||
|
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||||
|
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||||
|
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||||
|
*/
|
||||||
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
|
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
|
||||||
private String RECAL_FILE = "output.recal_data.csv";
|
private String RECAL_FILE = "output.recal_data.csv";
|
||||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||||
|
|
@ -65,13 +120,22 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
private String PATH_TO_RESOURCES = "public/R/";
|
private String PATH_TO_RESOURCES = "public/R/";
|
||||||
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
|
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
|
||||||
private int IGNORE_QSCORES_LESS_THAN = 5;
|
private int IGNORE_QSCORES_LESS_THAN = 5;
|
||||||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||||
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
|
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
|
||||||
|
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
|
||||||
|
*/
|
||||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
|
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
|
||||||
private int MAX_QUALITY_SCORE = 50;
|
private int MAX_QUALITY_SCORE = 50;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This argument is useful for comparing before/after plots and you want the axes to match each other.
|
||||||
|
*/
|
||||||
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
||||||
private int MAX_HISTOGRAM_VALUE = 0;
|
private int MAX_HISTOGRAM_VALUE = 0;
|
||||||
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
|
||||||
private boolean DO_INDEL_QUALITY = false;
|
private boolean DO_INDEL_QUALITY = false;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -261,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void callRScripts() {
|
private void callRScripts() {
|
||||||
|
RScriptExecutor.RScriptArgumentCollection argumentCollection =
|
||||||
|
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
|
||||||
|
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
|
||||||
|
|
||||||
int numReadGroups = 0;
|
int numReadGroups = 0;
|
||||||
|
|
||||||
// for each read group
|
// for each read group
|
||||||
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
|
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
|
||||||
|
|
||||||
Process p;
|
|
||||||
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
|
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
|
||||||
|
|
||||||
String readGroup = readGroupKey.toString();
|
String readGroup = readGroupKey.toString();
|
||||||
|
|
@ -276,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
||||||
// for each covariate
|
// for each covariate
|
||||||
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
|
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
|
||||||
Covariate cov = requestedCovariates.get(iii);
|
Covariate cov = requestedCovariates.get(iii);
|
||||||
try {
|
final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
|
||||||
|
if (DO_INDEL_QUALITY) {
|
||||||
if (DO_INDEL_QUALITY) {
|
executor.callRScripts("plot_indelQuality.R", outputFilename,
|
||||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " +
|
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
} else {
|
||||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
|
||||||
p.waitFor();
|
|
||||||
|
|
||||||
} else {
|
|
||||||
if( iii == 1 ) {
|
if( iii == 1 ) {
|
||||||
// Analyze reported quality
|
// Analyze reported quality
|
||||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
|
executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
|
||||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||||
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
} else { // Analyze all other covariates
|
||||||
p.waitFor();
|
executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
|
||||||
} else { // Analyze all other covariates
|
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
|
|
||||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
|
||||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
|
||||||
p.waitFor();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} catch (InterruptedException e) {
|
|
||||||
e.printStackTrace();
|
|
||||||
System.exit(-1);
|
|
||||||
} catch (IOException e) {
|
|
||||||
System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option.");
|
|
||||||
e.printStackTrace();
|
|
||||||
System.exit(-1);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else { // at the maximum number of read groups so break out
|
} else { // at the maximum number of read groups so break out
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,4 @@
|
||||||
|
/**
|
||||||
|
* Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
|
||||||
|
*/
|
||||||
|
package org.broadinstitute.sting.analyzecovariates;
|
||||||
|
|
@ -0,0 +1,40 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import java.lang.annotation.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates that a walker argument should is considered an advanced option.
|
||||||
|
*
|
||||||
|
* @author Mark DePristo
|
||||||
|
* @version 0.1
|
||||||
|
*/
|
||||||
|
@Documented
|
||||||
|
@Inherited
|
||||||
|
@Retention(RetentionPolicy.RUNTIME)
|
||||||
|
@Target({ElementType.TYPE,ElementType.FIELD})
|
||||||
|
public @interface Advanced {
|
||||||
|
}
|
||||||
|
|
@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
|
||||||
|
|
||||||
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
|
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
|
||||||
public boolean matches( ArgumentDefinition definition, Object key ) {
|
public boolean matches( ArgumentDefinition definition, Object key ) {
|
||||||
return definition.validation != null;
|
// We can perform some sort of validation for anything that isn't a flag.
|
||||||
|
return !definition.isFlag;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
||||||
public final String label;
|
public final String label;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Maps indicies of command line arguments to values paired with that argument.
|
* Maps indices of command line arguments to values paired with that argument.
|
||||||
*/
|
*/
|
||||||
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -151,6 +151,14 @@ public class ArgumentSource {
|
||||||
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
|
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is the given argument considered an advanced option when displaying on the command-line argument system.
|
||||||
|
* @return True if so. False otherwise.
|
||||||
|
*/
|
||||||
|
public boolean isAdvanced() {
|
||||||
|
return field.isAnnotationPresent(Advanced.class);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this command-line argument dependent on some primitive argument types?
|
* Is this command-line argument dependent on some primitive argument types?
|
||||||
* @return True if this command-line argument depends on other arguments; false otherwise.
|
* @return True if this command-line argument depends on other arguments; false otherwise.
|
||||||
|
|
@ -175,13 +183,17 @@ public class ArgumentSource {
|
||||||
return typeDescriptor.createsTypeDefault(this);
|
return typeDescriptor.createsTypeDefault(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String typeDefaultDocString() {
|
||||||
|
return typeDescriptor.typeDefaultDocString(this);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates a default for the given type.
|
* Generates a default for the given type.
|
||||||
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
||||||
* @return A default value for the given type.
|
* @return A default value for the given type.
|
||||||
*/
|
*/
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine) {
|
public Object createTypeDefault(ParsingEngine parsingEngine) {
|
||||||
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType());
|
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType());
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,8 @@
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
||||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
|
|
@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
import java.lang.annotation.Annotation;
|
import java.lang.annotation.Annotation;
|
||||||
import java.lang.reflect.*;
|
import java.lang.reflect.*;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -80,14 +83,26 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
*/
|
*/
|
||||||
public boolean createsTypeDefault(ArgumentSource source) { return false; }
|
public boolean createsTypeDefault(ArgumentSource source) { return false; }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a documentation-friendly value for the default of a type descriptor.
|
||||||
|
* Must be overridden if createsTypeDefault return true. cannot be called otherwise
|
||||||
|
* @param source Source of the command-line argument.
|
||||||
|
* @return Friendly string of the default value, for documentation. If doesn't create a default, throws
|
||||||
|
* and UnsupportedOperationException
|
||||||
|
*/
|
||||||
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates a default for the given type.
|
* Generates a default for the given type.
|
||||||
|
*
|
||||||
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
||||||
* @param source Source of the command-line argument.
|
* @param source Source of the command-line argument.
|
||||||
* @param type Type of value to create, in case the command-line argument system wants influence.
|
* @param type Type of value to create, in case the command-line argument system wants influence.
|
||||||
* @return A default value for the given type.
|
* @return A default value for the given type.
|
||||||
*/
|
*/
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
|
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
|
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
|
||||||
|
|
@ -109,7 +124,7 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
* @return The parsed object.
|
* @return The parsed object.
|
||||||
*/
|
*/
|
||||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
|
||||||
return parse(parsingEngine, source, source.field.getType(), matches);
|
return parse(parsingEngine, source, source.field.getGenericType(), matches);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -131,18 +146,18 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
|
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
|
||||||
Annotation argumentAnnotation = getArgumentAnnotation(source);
|
Annotation argumentAnnotation = getArgumentAnnotation(source);
|
||||||
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
|
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
|
||||||
source.field.getType(),
|
source.field.getType(),
|
||||||
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
||||||
ArgumentDefinition.getShortName(argumentAnnotation),
|
ArgumentDefinition.getShortName(argumentAnnotation),
|
||||||
ArgumentDefinition.getDoc(argumentAnnotation),
|
ArgumentDefinition.getDoc(argumentAnnotation),
|
||||||
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
||||||
source.isFlag(),
|
source.isFlag(),
|
||||||
source.isMultiValued(),
|
source.isMultiValued(),
|
||||||
source.isHidden(),
|
source.isHidden(),
|
||||||
getCollectionComponentType(source.field),
|
makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
|
||||||
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
||||||
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
||||||
getValidOptions(source) );
|
getValidOptions(source) );
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -151,7 +166,7 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
* @return The parameterized component type, or String.class if the parameterized type could not be found.
|
* @return The parameterized component type, or String.class if the parameterized type could not be found.
|
||||||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||||
*/
|
*/
|
||||||
protected Class getCollectionComponentType( Field field ) {
|
protected Type getCollectionComponentType( Field field ) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -162,7 +177,7 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
|
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
|
||||||
* @return The individual parsed object matching the argument match with Class type.
|
* @return The individual parsed object matching the argument match with Class type.
|
||||||
*/
|
*/
|
||||||
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
|
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If the argument source only accepts a small set of options, populate the returned list with
|
* If the argument source only accepts a small set of options, populate the returned list with
|
||||||
|
|
@ -273,6 +288,123 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
public static boolean isArgumentHidden(Field field) {
|
public static boolean isArgumentHidden(Field field) {
|
||||||
return field.isAnnotationPresent(Hidden.class);
|
return field.isAnnotationPresent(Hidden.class);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Class makeRawTypeIfNecessary(Type t) {
|
||||||
|
if ( t == null )
|
||||||
|
return null;
|
||||||
|
else if ( t instanceof ParameterizedType )
|
||||||
|
return (Class)((ParameterizedType) t).getRawType();
|
||||||
|
else if ( t instanceof Class ) {
|
||||||
|
return (Class)t;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parser for RodBinding objects
|
||||||
|
*/
|
||||||
|
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
/**
|
||||||
|
* We only want RodBinding class objects
|
||||||
|
* @param type The type to check.
|
||||||
|
* @return true if the provided class is a RodBinding.class
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean supports( Class type ) {
|
||||||
|
return isRodBinding(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isRodBinding( Class type ) {
|
||||||
|
return RodBinding.class.isAssignableFrom(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
|
||||||
|
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||||
|
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
return "none";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||||
|
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||||
|
String value = getArgumentValue( defaultDefinition, matches );
|
||||||
|
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||||
|
|
||||||
|
try {
|
||||||
|
String name = defaultDefinition.fullName;
|
||||||
|
String tribbleType = null;
|
||||||
|
Tags tags = getArgumentTags(matches);
|
||||||
|
// must have one or two tag values here
|
||||||
|
if ( tags.getPositionalTags().size() > 2 ) {
|
||||||
|
throw new UserException.CommandLineException(
|
||||||
|
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||||
|
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
||||||
|
value, source.field.getName()));
|
||||||
|
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||||
|
// -X:name,type style
|
||||||
|
name = tags.getPositionalTags().get(0);
|
||||||
|
tribbleType = tags.getPositionalTags().get(1);
|
||||||
|
} else {
|
||||||
|
// case with 0 or 1 positional tags
|
||||||
|
FeatureManager manager = new FeatureManager();
|
||||||
|
|
||||||
|
// -X:type style is a type when we cannot determine the type dynamically
|
||||||
|
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||||
|
if ( tag1 != null ) {
|
||||||
|
if ( manager.getByName(tag1) != null ) // this a type
|
||||||
|
tribbleType = tag1;
|
||||||
|
else
|
||||||
|
name = tag1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( tribbleType == null ) {
|
||||||
|
// try to determine the file type dynamically
|
||||||
|
File file = new File(value);
|
||||||
|
if ( file.canRead() && file.isFile() ) {
|
||||||
|
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||||
|
if ( featureDescriptor != null ) {
|
||||||
|
tribbleType = featureDescriptor.getName();
|
||||||
|
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( tribbleType == null )
|
||||||
|
if ( ! file.canRead() | ! file.isFile() ) {
|
||||||
|
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
||||||
|
} else {
|
||||||
|
throw new UserException.CommandLineException(
|
||||||
|
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||||
|
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||||
|
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||||
|
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||||
|
parsingEngine.addTags(result,tags);
|
||||||
|
parsingEngine.addRodBinding(result);
|
||||||
|
return result;
|
||||||
|
} catch (InvocationTargetException e) {
|
||||||
|
throw new UserException.CommandLineException(
|
||||||
|
String.format("Failed to parse value %s for argument %s.",
|
||||||
|
value, source.field.getName()));
|
||||||
|
} catch (Exception e) {
|
||||||
|
throw new UserException.CommandLineException(
|
||||||
|
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||||
|
value, source.field.getName(), e.getMessage()));
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -282,9 +414,10 @@ public abstract class ArgumentTypeDescriptor {
|
||||||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
@Override
|
@Override
|
||||||
public boolean supports( Class type ) {
|
public boolean supports( Class type ) {
|
||||||
if( type.isPrimitive() ) return true;
|
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
||||||
if( type.isEnum() ) return true;
|
if ( type.isPrimitive() ) return true;
|
||||||
if( primitiveToWrapperMap.containsValue(type) ) return true;
|
if ( type.isEnum() ) return true;
|
||||||
|
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
type.getConstructor(String.class);
|
type.getConstructor(String.class);
|
||||||
|
|
@ -298,7 +431,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||||
|
Class type = makeRawTypeIfNecessary(fulltype);
|
||||||
if (source.isFlag())
|
if (source.isFlag())
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
|
|
@ -339,7 +473,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
throw e;
|
throw e;
|
||||||
} catch (InvocationTargetException e) {
|
} catch (InvocationTargetException e) {
|
||||||
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
|
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
|
||||||
value, source.field.getName()));
|
value, source.field.getName()));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new DynamicClassResolutionException(String.class, e);
|
throw new DynamicClassResolutionException(String.class, e);
|
||||||
}
|
}
|
||||||
|
|
@ -351,7 +485,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
|
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
|
||||||
|
|
@ -382,10 +516,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
|
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||||
Class componentType;
|
Class type = makeRawTypeIfNecessary(fulltype);
|
||||||
|
Type componentType;
|
||||||
Object result;
|
Object result;
|
||||||
Tags tags;
|
|
||||||
|
|
||||||
if( Collection.class.isAssignableFrom(type) ) {
|
if( Collection.class.isAssignableFrom(type) ) {
|
||||||
|
|
||||||
|
|
@ -399,7 +533,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
componentType = getCollectionComponentType( source.field );
|
componentType = getCollectionComponentType( source.field );
|
||||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||||
|
|
||||||
Collection collection;
|
Collection collection;
|
||||||
try {
|
try {
|
||||||
|
|
@ -428,7 +562,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
else if( type.isArray() ) {
|
else if( type.isArray() ) {
|
||||||
componentType = type.getComponentType();
|
componentType = type.getComponentType();
|
||||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||||
|
|
||||||
// Assemble a collection of individual values used in this computation.
|
// Assemble a collection of individual values used in this computation.
|
||||||
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
|
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
|
||||||
|
|
@ -436,7 +570,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
for( ArgumentMatch value: match )
|
for( ArgumentMatch value: match )
|
||||||
values.add(value);
|
values.add(value);
|
||||||
|
|
||||||
result = Array.newInstance(componentType,values.size());
|
result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
|
||||||
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for( ArgumentMatch value: values ) {
|
for( ArgumentMatch value: values ) {
|
||||||
|
|
@ -459,16 +593,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected Class getCollectionComponentType( Field field ) {
|
protected Type getCollectionComponentType( Field field ) {
|
||||||
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
||||||
if( field.getGenericType() instanceof ParameterizedType) {
|
if( field.getGenericType() instanceof ParameterizedType) {
|
||||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||||
return (Class)parameterizedType.getActualTypeArguments()[0];
|
return parameterizedType.getActualTypeArguments()[0];
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
return String.class;
|
return String.class;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -510,12 +644,12 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||||
if(multiplexer == null || multiplexedIds == null)
|
if(multiplexer == null || multiplexedIds == null)
|
||||||
throw new ReviewedStingException("No multiplexed ids available");
|
throw new ReviewedStingException("No multiplexed ids available");
|
||||||
|
|
||||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||||
Class componentType = getCollectionComponentType(source.field);
|
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||||
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
|
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||||
|
|
||||||
for(Object id: multiplexedIds) {
|
for(Object id: multiplexedIds) {
|
||||||
|
|
@ -527,15 +661,19 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
return multiplexedMapping;
|
return multiplexedMapping;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
return "None";
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||||
if(multiplexedIds == null)
|
if(multiplexedIds == null)
|
||||||
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
|
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
|
||||||
|
|
||||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||||
|
|
||||||
Class componentType = getCollectionComponentType(source.field);
|
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||||
|
|
||||||
|
|
||||||
for(Object id: multiplexedIds) {
|
for(Object id: multiplexedIds) {
|
||||||
|
|
@ -606,7 +744,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
protected Class getCollectionComponentType( Field field ) {
|
protected Type getCollectionComponentType( Field field ) {
|
||||||
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
|
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
|
||||||
if( field.getGenericType() instanceof ParameterizedType) {
|
if( field.getGenericType() instanceof ParameterizedType) {
|
||||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ import java.util.Locale;
|
||||||
public abstract class CommandLineProgram {
|
public abstract class CommandLineProgram {
|
||||||
|
|
||||||
/** The command-line program and the arguments it returned. */
|
/** The command-line program and the arguments it returned. */
|
||||||
protected ParsingEngine parser = null;
|
public ParsingEngine parser = null;
|
||||||
|
|
||||||
/** the default log level */
|
/** the default log level */
|
||||||
@Argument(fullName = "logging_level",
|
@Argument(fullName = "logging_level",
|
||||||
|
|
@ -144,6 +144,11 @@ public abstract class CommandLineProgram {
|
||||||
|
|
||||||
public static int result = -1;
|
public static int result = -1;
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
public static void start(CommandLineProgram clp, String[] args) throws Exception {
|
||||||
|
start(clp, args, false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function is called to start processing the command line, and kick
|
* This function is called to start processing the command line, and kick
|
||||||
* off the execute message of the program.
|
* off the execute message of the program.
|
||||||
|
|
@ -153,7 +158,7 @@ public abstract class CommandLineProgram {
|
||||||
* @throws Exception when an exception occurs
|
* @throws Exception when an exception occurs
|
||||||
*/
|
*/
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
public static void start(CommandLineProgram clp, String[] args) throws Exception {
|
public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// setup our log layout
|
// setup our log layout
|
||||||
|
|
@ -180,8 +185,9 @@ public abstract class CommandLineProgram {
|
||||||
// - InvalidArgument in case these arguments are specified by plugins.
|
// - InvalidArgument in case these arguments are specified by plugins.
|
||||||
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've
|
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've
|
||||||
// determined the full complement of arguments.
|
// determined the full complement of arguments.
|
||||||
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
|
if ( ! dryRun )
|
||||||
ParsingEngine.ValidationType.InvalidArgument));
|
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
|
||||||
|
ParsingEngine.ValidationType.InvalidArgument));
|
||||||
parser.loadArgumentsIntoObject(clp);
|
parser.loadArgumentsIntoObject(clp);
|
||||||
|
|
||||||
// Initialize the logger using the loaded command line.
|
// Initialize the logger using the loaded command line.
|
||||||
|
|
@ -195,36 +201,40 @@ public abstract class CommandLineProgram {
|
||||||
if (isHelpPresent(parser))
|
if (isHelpPresent(parser))
|
||||||
printHelpAndExit(clp, parser);
|
printHelpAndExit(clp, parser);
|
||||||
|
|
||||||
parser.validate();
|
if ( ! dryRun ) parser.validate();
|
||||||
} else {
|
} else {
|
||||||
parser.parse(args);
|
parser.parse(args);
|
||||||
|
|
||||||
if (isHelpPresent(parser))
|
if ( ! dryRun ) {
|
||||||
printHelpAndExit(clp, parser);
|
if (isHelpPresent(parser))
|
||||||
|
printHelpAndExit(clp, parser);
|
||||||
|
|
||||||
parser.validate();
|
parser.validate();
|
||||||
|
}
|
||||||
parser.loadArgumentsIntoObject(clp);
|
parser.loadArgumentsIntoObject(clp);
|
||||||
|
|
||||||
// Initialize the logger using the loaded command line.
|
// Initialize the logger using the loaded command line.
|
||||||
clp.setupLoggerLevel(layout);
|
clp.setupLoggerLevel(layout);
|
||||||
}
|
}
|
||||||
|
|
||||||
// if they specify a log location, output our data there
|
if ( ! dryRun ) {
|
||||||
if (clp.toFile != null) {
|
// if they specify a log location, output our data there
|
||||||
FileAppender appender;
|
if (clp.toFile != null) {
|
||||||
try {
|
FileAppender appender;
|
||||||
appender = new FileAppender(layout, clp.toFile, false);
|
try {
|
||||||
logger.addAppender(appender);
|
appender = new FileAppender(layout, clp.toFile, false);
|
||||||
} catch (IOException e) {
|
logger.addAppender(appender);
|
||||||
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// regardless of what happens next, generate the header information
|
||||||
|
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
|
||||||
|
|
||||||
|
// call the execute
|
||||||
|
CommandLineProgram.result = clp.execute();
|
||||||
}
|
}
|
||||||
|
|
||||||
// regardless of what happens next, generate the header information
|
|
||||||
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
|
|
||||||
|
|
||||||
// call the execute
|
|
||||||
CommandLineProgram.result = clp.execute();
|
|
||||||
}
|
}
|
||||||
catch (ArgumentException e) {
|
catch (ArgumentException e) {
|
||||||
clp.parser.printHelp(clp.getApplicationDetails());
|
clp.parser.printHelp(clp.getApplicationDetails());
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ public @interface Output {
|
||||||
* --help argument is specified.
|
* --help argument is specified.
|
||||||
* @return Doc string associated with this command-line argument.
|
* @return Doc string associated with this command-line argument.
|
||||||
*/
|
*/
|
||||||
String doc() default "An output file presented to the walker. Will overwrite contents if file exists.";
|
String doc() default "An output file created by the walker. Will overwrite contents if file exists";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is this argument required. If true, the command-line argument system will
|
* Is this argument required. If true, the command-line argument system will
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.commandline;
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
|
|
@ -41,11 +42,16 @@ import java.util.*;
|
||||||
* A parser for Sting command-line arguments.
|
* A parser for Sting command-line arguments.
|
||||||
*/
|
*/
|
||||||
public class ParsingEngine {
|
public class ParsingEngine {
|
||||||
|
/**
|
||||||
|
* The loaded argument sources along with their back definitions.
|
||||||
|
*/
|
||||||
|
private Map<ArgumentDefinition,ArgumentSource> argumentSourcesByDefinition = new HashMap<ArgumentDefinition,ArgumentSource>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A list of defined arguments against which command lines are matched.
|
* A list of defined arguments against which command lines are matched.
|
||||||
* Package protected for testing access.
|
* Package protected for testing access.
|
||||||
*/
|
*/
|
||||||
ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
|
public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A list of matches from defined arguments to command-line text.
|
* A list of matches from defined arguments to command-line text.
|
||||||
|
|
@ -59,11 +65,17 @@ public class ParsingEngine {
|
||||||
*/
|
*/
|
||||||
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
|
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* All of the RodBinding objects we've seen while parsing
|
||||||
|
*/
|
||||||
|
private List<RodBinding> rodBindings = new ArrayList<RodBinding>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class reference to the different types of descriptors that the create method can create.
|
* Class reference to the different types of descriptors that the create method can create.
|
||||||
* The type of set used must be ordered (but not necessarily sorted).
|
* The type of set used must be ordered (but not necessarily sorted).
|
||||||
*/
|
*/
|
||||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||||
|
new RodBindingArgumentTypeDescriptor(),
|
||||||
new CompoundArgumentTypeDescriptor(),
|
new CompoundArgumentTypeDescriptor(),
|
||||||
new MultiplexArgumentTypeDescriptor()) );
|
new MultiplexArgumentTypeDescriptor()) );
|
||||||
|
|
||||||
|
|
@ -80,6 +92,7 @@ public class ParsingEngine {
|
||||||
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
|
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
|
||||||
|
|
||||||
public ParsingEngine( CommandLineProgram clp ) {
|
public ParsingEngine( CommandLineProgram clp ) {
|
||||||
|
RodBinding.resetNameCounter();
|
||||||
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
|
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
|
||||||
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
|
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
|
||||||
|
|
||||||
|
|
@ -107,8 +120,13 @@ public class ParsingEngine {
|
||||||
*/
|
*/
|
||||||
public void addArgumentSource( String sourceName, Class sourceClass ) {
|
public void addArgumentSource( String sourceName, Class sourceClass ) {
|
||||||
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
|
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
|
||||||
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
|
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
|
||||||
argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
|
List<ArgumentDefinition> argumentDefinitions = argumentSource.createArgumentDefinitions();
|
||||||
|
for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
|
||||||
|
argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
|
||||||
|
argumentsFromSource.add( argumentDefinition );
|
||||||
|
}
|
||||||
|
}
|
||||||
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
|
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -199,16 +217,25 @@ public class ParsingEngine {
|
||||||
throw new InvalidArgumentException( invalidArguments );
|
throw new InvalidArgumentException( invalidArguments );
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find invalid argument values (arguments that fail the regexp test.
|
// Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
|
||||||
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
|
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
|
||||||
Collection<ArgumentDefinition> verifiableArguments =
|
Collection<ArgumentDefinition> verifiableArguments =
|
||||||
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
|
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
|
||||||
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
|
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
|
||||||
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
|
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
|
||||||
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
|
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
|
||||||
|
// Check to see whether an argument value was specified. Argument values must be provided
|
||||||
|
// when the argument name is specified and the argument is not a flag type.
|
||||||
|
for(ArgumentMatch verifiableMatch: verifiableMatches) {
|
||||||
|
ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
|
||||||
|
if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
|
||||||
|
invalidValues.add(new Pair<ArgumentDefinition,String>(verifiableArgument,null));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure that the field contents meet the validation criteria specified by the regular expression.
|
||||||
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
|
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
|
||||||
for( String value: verifiableMatch.values() ) {
|
for( String value: verifiableMatch.values() ) {
|
||||||
if( !value.matches(verifiableArgument.validation) )
|
if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
|
||||||
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
|
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -304,7 +331,17 @@ public class ParsingEngine {
|
||||||
if(!tags.containsKey(key))
|
if(!tags.containsKey(key))
|
||||||
return new Tags();
|
return new Tags();
|
||||||
return tags.get(key);
|
return tags.get(key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Add a RodBinding type argument to this parser. Called during parsing to allow
|
||||||
|
* us to track all of the RodBindings discovered in the command line.
|
||||||
|
* @param rodBinding the rodbinding to add. Must not be added twice
|
||||||
|
*/
|
||||||
|
@Requires("rodBinding != null")
|
||||||
|
public void addRodBinding(final RodBinding rodBinding) {
|
||||||
|
rodBindings.add(rodBinding);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Notify the user that a deprecated command-line argument has been used.
|
* Notify the user that a deprecated command-line argument has been used.
|
||||||
|
|
@ -327,7 +364,7 @@ public class ParsingEngine {
|
||||||
*/
|
*/
|
||||||
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
|
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
|
||||||
// Nothing to load
|
// Nothing to load
|
||||||
if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired()))
|
if( argumentMatches.size() == 0 && ! source.createsTypeDefault() )
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// Target instance into which to inject the value.
|
// Target instance into which to inject the value.
|
||||||
|
|
@ -344,6 +381,10 @@ public class ParsingEngine {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<RodBinding> getRodBindings() {
|
||||||
|
return Collections.unmodifiableCollection(rodBindings);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Gets a collection of the container instances of the given type stored within the given target.
|
* Gets a collection of the container instances of the given type stored within the given target.
|
||||||
* @param source Argument source.
|
* @param source Argument source.
|
||||||
|
|
@ -390,7 +431,6 @@ public class ParsingEngine {
|
||||||
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
|
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
|
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
|
||||||
// now simply call into the truly general routine extract argument bindings but with a null
|
// now simply call into the truly general routine extract argument bindings but with a null
|
||||||
// object so bindings aren't computed
|
// object so bindings aren't computed
|
||||||
|
|
@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
|
||||||
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
|
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
|
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
|
||||||
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
if(invalidValue.getSecond() == null)
|
||||||
invalidValue.first.fullName,
|
sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
|
||||||
invalidValue.second,
|
invalidValue.first.fullName) );
|
||||||
invalidValue.first.validation) );
|
else
|
||||||
|
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
||||||
|
invalidValue.first.fullName,
|
||||||
|
invalidValue.second,
|
||||||
|
invalidValue.first.validation) );
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,187 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.commandline;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A RodBinding representing a walker argument that gets bound to a ROD track.
|
||||||
|
*
|
||||||
|
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
|
||||||
|
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
|
||||||
|
* is explicitly typed with type of the Tribble.Feature expected to be produced by this
|
||||||
|
* argument. The GATK Engine takes care of initializing the binding and connecting it
|
||||||
|
* to the RMD system.
|
||||||
|
*
|
||||||
|
* It is recommended that optional RodBindings be initialized to the value returned
|
||||||
|
* by the static method makeUnbound().
|
||||||
|
*
|
||||||
|
* Note that this class is immutable.
|
||||||
|
*/
|
||||||
|
public final class RodBinding<T extends Feature> {
|
||||||
|
protected final static String UNBOUND_VARIABLE_NAME = "";
|
||||||
|
protected final static String UNBOUND_SOURCE = "UNBOUND";
|
||||||
|
protected final static String UNBOUND_TRIBBLE_TYPE = "";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create an unbound Rodbinding of type. This is the correct programming
|
||||||
|
* style for an optional RodBinding<T>
|
||||||
|
*
|
||||||
|
* At Input()
|
||||||
|
* RodBinding<T> x = RodBinding.makeUnbound(T.class)
|
||||||
|
*
|
||||||
|
* The unbound binding is guaranteed to never match any binding. It uniquely
|
||||||
|
* returns false to isBound().
|
||||||
|
*
|
||||||
|
* @param type the Class type produced by this unbound object
|
||||||
|
* @param <T> any class extending Tribble Feature
|
||||||
|
* @return the UNBOUND RodBinding producing objects of type T
|
||||||
|
*/
|
||||||
|
@Requires("type != null")
|
||||||
|
protected final static <T extends Feature> RodBinding<T> makeUnbound(Class<T> type) {
|
||||||
|
return new RodBinding<T>(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
|
||||||
|
final private String name;
|
||||||
|
/** where the data for this ROD is coming from. A file or special value if coming from stdin */
|
||||||
|
final private String source;
|
||||||
|
/** the string name of the tribble type, such as vcf, bed, etc. */
|
||||||
|
final private String tribbleType;
|
||||||
|
/** The command line tags associated with this RodBinding */
|
||||||
|
final private Tags tags;
|
||||||
|
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
|
||||||
|
final private Class<T> type;
|
||||||
|
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
|
||||||
|
final private boolean bound;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The name counter. This is how we create unique names for collections of RodBindings
|
||||||
|
* on the command line. If you have provide the GATK with -X file1 and -X file2 to a
|
||||||
|
* RodBinding argument as List<RodBinding<T>> then each binding will receive automatically
|
||||||
|
* the name of X and X2.
|
||||||
|
*/
|
||||||
|
final private static Map<String, Integer> nameCounter = new HashMap<String, Integer>();
|
||||||
|
|
||||||
|
/** for UnitTests */
|
||||||
|
final public static void resetNameCounter() {
|
||||||
|
nameCounter.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("rawName != null")
|
||||||
|
@Ensures("result != null")
|
||||||
|
final private static synchronized String countedVariableName(final String rawName) {
|
||||||
|
Integer count = nameCounter.get(rawName);
|
||||||
|
if ( count == null ) {
|
||||||
|
nameCounter.put(rawName, 1);
|
||||||
|
return rawName;
|
||||||
|
} else {
|
||||||
|
nameCounter.put(rawName, count + 1);
|
||||||
|
return rawName + (count + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||||
|
public RodBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||||
|
this.type = type;
|
||||||
|
this.name = countedVariableName(rawName);
|
||||||
|
this.source = source;
|
||||||
|
this.tribbleType = tribbleType;
|
||||||
|
this.tags = tags;
|
||||||
|
this.bound = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
|
||||||
|
* @param type class this unbound RodBinding creates
|
||||||
|
*/
|
||||||
|
@Requires({"type != null"})
|
||||||
|
private RodBinding(Class<T> type) {
|
||||||
|
this.type = type;
|
||||||
|
this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
|
||||||
|
this.source = UNBOUND_SOURCE;
|
||||||
|
this.tribbleType = UNBOUND_TRIBBLE_TYPE;
|
||||||
|
this.tags = new Tags();
|
||||||
|
this.bound = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
|
||||||
|
*/
|
||||||
|
final public boolean isBound() {
|
||||||
|
return bound;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
final public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the string name of the tribble type, such as vcf, bed, etc.
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
final public Class<T> getType() {
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return where the data for this ROD is coming from. A file or special value if coming from stdin
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
final public String getSource() {
|
||||||
|
return source;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The command line tags associated with this RodBinding. Will include the tags used to
|
||||||
|
* determine the name and type of this RodBinding
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
final public Tags getTags() {
|
||||||
|
return tags;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
|
||||||
|
*/
|
||||||
|
@Ensures({"result != null"})
|
||||||
|
final public String getTribbleType() {
|
||||||
|
return tribbleType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -25,21 +25,20 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk;
|
package org.broadinstitute.sting.gatk;
|
||||||
|
|
||||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
import org.broadinstitute.sting.commandline.*;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
|
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.*;
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* @author aaron
|
||||||
|
|
@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
*/
|
*/
|
||||||
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
||||||
|
|
||||||
|
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* this is the function that the inheriting class can expect to have called
|
* this is the function that the inheriting class can expect to have called
|
||||||
* when the command line system has initialized.
|
* when the command line system has initialized.
|
||||||
|
|
@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
|
|
||||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||||
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
|
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
|
||||||
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
|
|
||||||
|
|
||||||
engine.setWalker(walker);
|
engine.setWalker(walker);
|
||||||
walker.setToolkit(engine);
|
walker.setToolkit(engine);
|
||||||
|
|
@ -96,6 +96,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
loadArgumentsIntoObject(walker);
|
loadArgumentsIntoObject(walker);
|
||||||
argumentSources.add(walker);
|
argumentSources.add(walker);
|
||||||
|
|
||||||
|
Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
|
||||||
|
|
||||||
|
// todo: remove me when the old style system is removed
|
||||||
|
if ( getArgumentCollection().RODBindings.size() > 0 ) {
|
||||||
|
logger.warn("################################################################################");
|
||||||
|
logger.warn("################################################################################");
|
||||||
|
logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2.");
|
||||||
|
logger.warn("Please use arguments defined by each specific walker instead.");
|
||||||
|
for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
|
||||||
|
logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
|
||||||
|
}
|
||||||
|
logger.warn("################################################################################");
|
||||||
|
logger.warn("################################################################################");
|
||||||
|
System.exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
engine.setReferenceMetaDataFiles(rodBindings);
|
||||||
|
|
||||||
for (ReadFilter filter: filters) {
|
for (ReadFilter filter: filters) {
|
||||||
loadArgumentsIntoObject(filter);
|
loadArgumentsIntoObject(filter);
|
||||||
argumentSources.add(filter);
|
argumentSources.add(filter);
|
||||||
|
|
@ -112,6 +130,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
||||||
* This report will be written to either STDOUT or to the run repository, depending on the options
|
* This report will be written to either STDOUT or to the run repository, depending on the options
|
||||||
|
|
@ -142,7 +161,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
||||||
*/
|
*/
|
||||||
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
||||||
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
|
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
|
||||||
new SAMFileReaderArgumentTypeDescriptor(engine),
|
|
||||||
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
|
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
|
||||||
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
|
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
||||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
import org.broadinstitute.sting.utils.help.*;
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author aaron
|
* The GATK engine itself. Manages map/reduce data access and runs walkers.
|
||||||
* @version 1.0
|
*
|
||||||
* @date May 8, 2009
|
|
||||||
* <p/>
|
|
||||||
* Class CommandLineGATK
|
|
||||||
* <p/>
|
|
||||||
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
|
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
|
||||||
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
|
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
|
||||||
* the gatk engine should deal with any data related information.
|
* the gatk engine should deal with any data related information.
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature(
|
||||||
|
groupName = "GATK Engine",
|
||||||
|
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
|
||||||
|
extraDocs = { UserException.class })
|
||||||
public class CommandLineGATK extends CommandLineExecutable {
|
public class CommandLineGATK extends CommandLineExecutable {
|
||||||
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
|
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
|
||||||
private String analysisName = null;
|
private String analysisName = null;
|
||||||
|
|
@ -173,12 +175,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
||||||
StringBuilder additionalHelp = new StringBuilder();
|
StringBuilder additionalHelp = new StringBuilder();
|
||||||
Formatter formatter = new Formatter(additionalHelp);
|
Formatter formatter = new Formatter(additionalHelp);
|
||||||
|
|
||||||
formatter.format("Description:%n");
|
formatter.format("Available Reference Ordered Data types:%n");
|
||||||
|
formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
|
||||||
|
formatter.format("%n");
|
||||||
|
|
||||||
WalkerManager walkerManager = engine.getWalkerManager();
|
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
|
||||||
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
|
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
|
||||||
|
|
||||||
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
|
|
||||||
|
|
||||||
return additionalHelp.toString();
|
return additionalHelp.toString();
|
||||||
}
|
}
|
||||||
|
|
@ -192,8 +194,6 @@ public class CommandLineGATK extends CommandLineExecutable {
|
||||||
StringBuilder additionalHelp = new StringBuilder();
|
StringBuilder additionalHelp = new StringBuilder();
|
||||||
Formatter formatter = new Formatter(additionalHelp);
|
Formatter formatter = new Formatter(additionalHelp);
|
||||||
|
|
||||||
formatter.format("Available analyses:%n");
|
|
||||||
|
|
||||||
// Get the list of walker names from the walker manager.
|
// Get the list of walker names from the walker manager.
|
||||||
WalkerManager walkerManager = engine.getWalkerManager();
|
WalkerManager walkerManager = engine.getWalkerManager();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
|
|
@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
|
||||||
throw new ArgumentException("Walker does not allow a reference but one was provided.");
|
throw new ArgumentException("Walker does not allow a reference but one was provided.");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
|
|
||||||
* 'allowed' is still present.
|
|
||||||
*
|
|
||||||
* @param rods Reference-ordered data to load.
|
|
||||||
*/
|
|
||||||
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
|
|
||||||
// Check to make sure that all required metadata is present.
|
|
||||||
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
|
|
||||||
for (RMD required : allRequired) {
|
|
||||||
boolean found = false;
|
|
||||||
for (ReferenceOrderedDataSource rod : rods) {
|
|
||||||
if (rod.matchesNameAndRecordType(required.name(), required.type()))
|
|
||||||
found = true;
|
|
||||||
}
|
|
||||||
if (!found)
|
|
||||||
throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
|
|
||||||
"Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check to see that no forbidden rods are present.
|
|
||||||
for (ReferenceOrderedDataSource rod : rods) {
|
|
||||||
if (!WalkerManager.isAllowed(walker, rod))
|
|
||||||
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void validateSuppliedIntervals() {
|
protected void validateSuppliedIntervals() {
|
||||||
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
|
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
|
||||||
if(!(walker instanceof ReadWalker)) {
|
if(!(walker instanceof ReadWalker)) {
|
||||||
|
|
@ -716,8 +689,6 @@ public class GenomeAnalysisEngine {
|
||||||
validateSuppliedReads();
|
validateSuppliedReads();
|
||||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
||||||
|
|
||||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
|
||||||
|
|
||||||
for (ReadFilter filter : filters)
|
for (ReadFilter filter : filters)
|
||||||
filter.initialize(this);
|
filter.initialize(this);
|
||||||
|
|
||||||
|
|
@ -926,9 +897,6 @@ public class GenomeAnalysisEngine {
|
||||||
GenomeLocParser genomeLocParser,
|
GenomeLocParser genomeLocParser,
|
||||||
ValidationExclusion.TYPE validationExclusionType) {
|
ValidationExclusion.TYPE validationExclusionType) {
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
|
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
|
||||||
// try and make the tracks given their requests
|
|
||||||
// create of live instances of the tracks
|
|
||||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
|
||||||
|
|
||||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||||
|
|
@ -939,7 +907,6 @@ public class GenomeAnalysisEngine {
|
||||||
flashbackData()));
|
flashbackData()));
|
||||||
|
|
||||||
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
|
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
|
||||||
validateSuppliedReferenceOrderedData(dataSources);
|
|
||||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
|
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
|
||||||
|
|
||||||
return dataSources;
|
return dataSources;
|
||||||
|
|
@ -994,7 +961,7 @@ public class GenomeAnalysisEngine {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the list of intervals passed to the engine.
|
* Get the list of intervals passed to the engine.
|
||||||
* @return List of intervals.
|
* @return List of intervals, or null if no intervals are in use
|
||||||
*/
|
*/
|
||||||
public GenomeLocSortedSet getIntervals() {
|
public GenomeLocSortedSet getIntervals() {
|
||||||
return this.intervals;
|
return this.intervals;
|
||||||
|
|
|
||||||
|
|
@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
|
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
|
||||||
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
|
|
||||||
import org.broadinstitute.sting.utils.help.SummaryTaglet;
|
|
||||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return A suitable display name for the package.
|
* @return A suitable display name for the package.
|
||||||
*/
|
*/
|
||||||
public String getPackageDisplayName(String packageName) {
|
public String getPackageDisplayName(String packageName) {
|
||||||
// Try to find an override for the display name of this package.
|
// ...try to compute the override from the text of the package name, while accounting for
|
||||||
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
|
// unpackaged walkers.
|
||||||
String displayName;
|
String displayName = packageName.substring(packageName.lastIndexOf('.')+1);
|
||||||
if(helpText.containsKey(displayNameKey)) {
|
if (displayName.trim().equals("")) displayName = "<unpackaged>";
|
||||||
displayName = helpText.getString(displayNameKey);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// If no override exists...
|
|
||||||
// ...try to compute the override from the text of the package name, while accounting for
|
|
||||||
// unpackaged walkers.
|
|
||||||
displayName = packageName.substring(packageName.lastIndexOf('.')+1);
|
|
||||||
if(displayName.trim().equals("")) displayName = "<unpackaged>";
|
|
||||||
}
|
|
||||||
return displayName;
|
return displayName;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return Package help text, or "" if none exists.
|
* @return Package help text, or "" if none exists.
|
||||||
*/
|
*/
|
||||||
public String getPackageSummaryText(String packageName) {
|
public String getPackageSummaryText(String packageName) {
|
||||||
String key = String.format("%s.%s",packageName,SummaryTaglet.NAME);
|
String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
|
||||||
if(!helpText.containsKey(key))
|
if(!helpText.containsKey(key))
|
||||||
return "";
|
return "";
|
||||||
return helpText.getString(key);
|
return helpText.getString(key);
|
||||||
|
|
@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return Walker summary description, or "" if none exists.
|
* @return Walker summary description, or "" if none exists.
|
||||||
*/
|
*/
|
||||||
public String getWalkerSummaryText(Class<? extends Walker> walkerType) {
|
public String getWalkerSummaryText(Class<? extends Walker> walkerType) {
|
||||||
String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME);
|
String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
|
||||||
if(!helpText.containsKey(walkerSummary))
|
if(!helpText.containsKey(walkerSummary))
|
||||||
return "";
|
return "";
|
||||||
return helpText.getString(walkerSummary);
|
return helpText.getString(walkerSummary);
|
||||||
|
|
@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return Walker full description, or "" if none exists.
|
* @return Walker full description, or "" if none exists.
|
||||||
*/
|
*/
|
||||||
public String getWalkerDescriptionText(Class<? extends Walker> walkerType) {
|
public String getWalkerDescriptionText(Class<? extends Walker> walkerType) {
|
||||||
String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME);
|
String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME);
|
||||||
if(!helpText.containsKey(walkerDescription))
|
if(!helpText.containsKey(walkerDescription))
|
||||||
return "";
|
return "";
|
||||||
return helpText.getString(walkerDescription);
|
return helpText.getString(walkerDescription);
|
||||||
|
|
@ -188,19 +177,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return The list of allowed reference meta data.
|
* @return The list of allowed reference meta data.
|
||||||
*/
|
*/
|
||||||
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
|
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
|
||||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
return Collections.<RMD>emptyList();
|
||||||
if (allowsDataSource == null)
|
|
||||||
return Collections.<RMD>emptyList();
|
|
||||||
return Arrays.asList(allowsDataSource.referenceMetaData());
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get a list of RODs allowed by the walker.
|
|
||||||
* @param walker Walker to query.
|
|
||||||
* @return The list of allowed reference meta data.
|
|
||||||
*/
|
|
||||||
public static List<RMD> getAllowsMetaData(Walker walker) {
|
|
||||||
return getAllowsMetaData(walker.getClass());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -237,24 +214,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return True if the walker forbids this data type. False otherwise.
|
* @return True if the walker forbids this data type. False otherwise.
|
||||||
*/
|
*/
|
||||||
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
|
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
|
||||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
return true;
|
||||||
|
|
||||||
// Allows is less restrictive than requires. If an allows
|
|
||||||
// clause is not specified, any kind of data is allowed.
|
|
||||||
if( allowsDataSource == null )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
// The difference between unspecified RMD and the empty set of metadata can't be detected.
|
|
||||||
// Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
|
|
||||||
// case in the future.
|
|
||||||
if( allowsDataSource.referenceMetaData().length == 0 )
|
|
||||||
return true;
|
|
||||||
|
|
||||||
for( RMD allowed: allowsDataSource.referenceMetaData() ) {
|
|
||||||
if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -294,8 +254,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
||||||
* @return The list of required reference meta data.
|
* @return The list of required reference meta data.
|
||||||
*/
|
*/
|
||||||
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
|
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
|
||||||
Requires requiresDataSource = getWalkerRequirements(walkerClass);
|
return Collections.emptyList();
|
||||||
return Arrays.asList(requiresDataSource.referenceMetaData());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -23,8 +23,26 @@
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.simpleframework.xml.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* @version 1.0
|
||||||
|
*/
|
||||||
|
@Root
|
||||||
|
public class DbsnpArgumentCollection {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A dbSNP VCF file.
|
||||||
|
*/
|
||||||
|
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
|
||||||
|
public RodBinding<VariantContext> dbsnp;
|
||||||
|
|
||||||
public interface ReferenceDataSourceProgressListener {
|
|
||||||
public void percentProgress(int percent);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -101,6 +101,8 @@ public class GATKArgumentCollection {
|
||||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||||
public File referenceFile = null;
|
public File referenceFile = null;
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
@Hidden
|
||||||
@ElementList(required = false)
|
@ElementList(required = false)
|
||||||
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
||||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||||
|
|
@ -117,11 +119,6 @@ public class GATKArgumentCollection {
|
||||||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||||
public boolean nonDeterministicRandomSeed = false;
|
public boolean nonDeterministicRandomSeed = false;
|
||||||
|
|
||||||
|
|
||||||
@Element(required = false)
|
|
||||||
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
|
||||||
public String DBSNPFile = null;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The override mechanism in the GATK, by default, populates the command-line arguments, then
|
* The override mechanism in the GATK, by default, populates the command-line arguments, then
|
||||||
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
|
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
|
||||||
|
|
@ -345,14 +342,6 @@ public class GATKArgumentCollection {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (other.RODBindings.size() != RODBindings.size()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
for (int x = 0; x < RODBindings.size(); x++) {
|
|
||||||
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!other.samFiles.equals(this.samFiles)) {
|
if (!other.samFiles.equals(this.samFiles)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -380,9 +369,6 @@ public class GATKArgumentCollection {
|
||||||
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
if (!other.unsafe.equals(this.unsafe)) {
|
if (!other.unsafe.equals(this.unsafe)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,49 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.arguments;
|
||||||
|
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
import org.simpleframework.xml.Root;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @author ebanks
|
||||||
|
* @version 1.0
|
||||||
|
*/
|
||||||
|
@Root
|
||||||
|
public class StandardVariantContextInputArgumentCollection {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Variants from this VCF file are used by this tool as input.
|
||||||
|
* The file must at least contain the standard VCF header lines, but
|
||||||
|
* can be empty (i.e., no variants are contained in the file).
|
||||||
|
*/
|
||||||
|
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||||
|
public RodBinding<VariantContext> variants;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -1,8 +1,10 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
||||||
* @param loc Locus at which to track.
|
* @param loc Locus at which to track.
|
||||||
* @return A tracker containing information about this locus.
|
* @return A tracker containing information about this locus.
|
||||||
*/
|
*/
|
||||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||||
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size());
|
List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
|
||||||
|
|
||||||
for ( ReferenceOrderedDataState state: states )
|
for ( ReferenceOrderedDataState state: states )
|
||||||
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
|
// todo -- warning, I removed the reference to the name from states
|
||||||
return tracks;
|
bindings.add( state.iterator.seekForward(loc) );
|
||||||
|
|
||||||
|
return new RefMetaDataTracker(bindings, referenceContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,9 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
public interface ReferenceOrderedView extends View {
|
public interface ReferenceOrderedView extends View {
|
||||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
|
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.providers;
|
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
|
|
@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
*/
|
*/
|
||||||
private RODMergingIterator rodQueue = null;
|
private RODMergingIterator rodQueue = null;
|
||||||
|
|
||||||
RefMetaDataTracker tracker = null;
|
Collection<RODRecordList> allTracksHere;
|
||||||
|
|
||||||
GenomeLoc lastLoc = null;
|
GenomeLoc lastLoc = null;
|
||||||
RODRecordList interval = null;
|
RODRecordList interval = null;
|
||||||
|
|
||||||
|
|
@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
}
|
}
|
||||||
|
|
||||||
rodQueue = new RODMergingIterator(iterators);
|
rodQueue = new RODMergingIterator(iterators);
|
||||||
|
|
||||||
//throw new StingException("RodLocusView currently disabled");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||||
return tracker;
|
// special case the interval again -- add it into the ROD
|
||||||
|
if ( interval != null ) { allTracksHere.add(interval); }
|
||||||
|
return new RefMetaDataTracker(allTracksHere, referenceContext);
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean hasNext() {
|
public boolean hasNext() {
|
||||||
|
|
@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
|
|
||||||
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
|
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
|
||||||
|
|
||||||
// Update the tracker here for use
|
allTracksHere = getSpanningTracks(datum);
|
||||||
Collection<RODRecordList> allTracksHere = getSpanningTracks(datum);
|
|
||||||
tracker = createTracker(allTracksHere);
|
|
||||||
|
|
||||||
GenomeLoc rodSite = datum.getLocation();
|
GenomeLoc rodSite = datum.getLocation();
|
||||||
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
|
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
|
||||||
|
|
||||||
|
|
@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
|
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
|
||||||
}
|
}
|
||||||
|
|
||||||
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
|
|
||||||
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
|
|
||||||
for ( RODRecordList track : allTracksHere ) {
|
|
||||||
if ( ! t.hasROD(track.getName()) )
|
|
||||||
t.bind(track.getName(), track);
|
|
||||||
}
|
|
||||||
|
|
||||||
// special case the interval again -- add it into the ROD
|
|
||||||
if ( interval != null ) { t.bind(interval.getName(), interval); }
|
|
||||||
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
|
|
||||||
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
|
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
|
||||||
return rodQueue.allElementsLTE(marker);
|
return rodQueue.allElementsLTE(marker);
|
||||||
}
|
}
|
||||||
|
|
@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
return getSkippedBases(getLocOneBeyondShard());
|
return getSkippedBases(getLocOneBeyondShard());
|
||||||
}
|
}
|
||||||
|
|
||||||
public RefMetaDataTracker getTracker() {
|
|
||||||
return tracker;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Closes the current view.
|
* Closes the current view.
|
||||||
*/
|
*/
|
||||||
|
|
@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
||||||
state.dataSource.close( state.iterator );
|
state.dataSource.close( state.iterator );
|
||||||
|
|
||||||
rodQueue = null;
|
rodQueue = null;
|
||||||
tracker = null;
|
allTracksHere = null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
||||||
*/
|
*/
|
||||||
public FilePointer next() {
|
public FilePointer next() {
|
||||||
FilePointer current = wrappedIterator.next();
|
FilePointer current = wrappedIterator.next();
|
||||||
//while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
||||||
// current = current.combine(parser,wrappedIterator.next());
|
current = current.combine(parser,wrappedIterator.next());
|
||||||
return current;
|
return current;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -893,6 +893,7 @@ public class SAMDataSource {
|
||||||
* Custom representation of interval bounds.
|
* Custom representation of interval bounds.
|
||||||
* Makes it simpler to track current position.
|
* Makes it simpler to track current position.
|
||||||
*/
|
*/
|
||||||
|
private int[] intervalContigIndices;
|
||||||
private int[] intervalStarts;
|
private int[] intervalStarts;
|
||||||
private int[] intervalEnds;
|
private int[] intervalEnds;
|
||||||
|
|
||||||
|
|
@ -917,12 +918,14 @@ public class SAMDataSource {
|
||||||
if(foundMappedIntervals) {
|
if(foundMappedIntervals) {
|
||||||
if(keepOnlyUnmappedReads)
|
if(keepOnlyUnmappedReads)
|
||||||
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
|
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
|
||||||
|
this.intervalContigIndices = new int[intervals.size()];
|
||||||
this.intervalStarts = new int[intervals.size()];
|
this.intervalStarts = new int[intervals.size()];
|
||||||
this.intervalEnds = new int[intervals.size()];
|
this.intervalEnds = new int[intervals.size()];
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for(GenomeLoc interval: intervals) {
|
for(GenomeLoc interval: intervals) {
|
||||||
intervalStarts[i] = (int)interval.getStart();
|
intervalContigIndices[i] = interval.getContigIndex();
|
||||||
intervalEnds[i] = (int)interval.getStop();
|
intervalStarts[i] = interval.getStart();
|
||||||
|
intervalEnds[i] = interval.getStop();
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -961,11 +964,10 @@ public class SAMDataSource {
|
||||||
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
|
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
|
||||||
if(!keepOnlyUnmappedReads) {
|
if(!keepOnlyUnmappedReads) {
|
||||||
// Mapped read filter; check against GenomeLoc-derived bounds.
|
// Mapped read filter; check against GenomeLoc-derived bounds.
|
||||||
if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
if(readEndsOnOrAfterStartingBound(candidateRead)) {
|
||||||
(candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
|
// This read ends after the current interval begins.
|
||||||
// This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
|
|
||||||
// Promising, but this read must be checked against the ending bound.
|
// Promising, but this read must be checked against the ending bound.
|
||||||
if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
|
if(readStartsOnOrBeforeEndingBound(candidateRead)) {
|
||||||
// Yes, this read is within both bounds. This must be our next read.
|
// Yes, this read is within both bounds. This must be our next read.
|
||||||
nextRead = candidateRead;
|
nextRead = candidateRead;
|
||||||
break;
|
break;
|
||||||
|
|
@ -993,6 +995,37 @@ public class SAMDataSource {
|
||||||
candidateRead = iterator.next();
|
candidateRead = iterator.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
|
||||||
|
* end will be distorted, so rely only on the alignment start.
|
||||||
|
* @param read The read to position-check.
|
||||||
|
* @return True if the read starts after the current bounds. False otherwise.
|
||||||
|
*/
|
||||||
|
private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
|
||||||
|
return
|
||||||
|
// Read ends on a later contig, or...
|
||||||
|
read.getReferenceIndex() > intervalContigIndices[currentBound] ||
|
||||||
|
// Read ends of this contig...
|
||||||
|
(read.getReferenceIndex() == intervalContigIndices[currentBound] &&
|
||||||
|
// either after this location, or...
|
||||||
|
(read.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
||||||
|
// read is unmapped but positioned and alignment start is on or after this start point.
|
||||||
|
(read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether the read lies before the end of the current bound.
|
||||||
|
* @param read The read to position-check.
|
||||||
|
* @return True if the read starts after the current bounds. False otherwise.
|
||||||
|
*/
|
||||||
|
private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
|
||||||
|
return
|
||||||
|
// Read starts on a prior contig, or...
|
||||||
|
read.getReferenceIndex() < intervalContigIndices[currentBound] ||
|
||||||
|
// Read starts on this contig and the alignment start is registered before this end point.
|
||||||
|
(read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -41,7 +41,7 @@ import java.io.File;
|
||||||
* Loads reference data from fasta file
|
* Loads reference data from fasta file
|
||||||
* Looks for fai and dict files, and tries to create them if they don't exist
|
* Looks for fai and dict files, and tries to create them if they don't exist
|
||||||
*/
|
*/
|
||||||
public class ReferenceDataSource implements ReferenceDataSourceProgressListener {
|
public class ReferenceDataSource {
|
||||||
private IndexedFastaSequenceFile index;
|
private IndexedFastaSequenceFile index;
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
/** our log, which we want to capture anything from this class */
|
||||||
|
|
@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
||||||
// get exclusive lock
|
// get exclusive lock
|
||||||
if (!indexLock.exclusiveLock())
|
if (!indexLock.exclusiveLock())
|
||||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
||||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this);
|
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
|
||||||
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
||||||
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
||||||
}
|
}
|
||||||
|
|
@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
||||||
public IndexedFastaSequenceFile getReference() {
|
public IndexedFastaSequenceFile getReference() {
|
||||||
return this.index;
|
return this.index;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Notify user of progress in creating fai file
|
|
||||||
* @param percent Percent of fasta file read as a percent
|
|
||||||
*/
|
|
||||||
public void percentProgress(int percent) {
|
|
||||||
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
||||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
public Class getType() {
|
public Class getType() {
|
||||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
|
||||||
}
|
}
|
||||||
|
|
||||||
public Class getRecordType() {
|
public Class getRecordType() {
|
||||||
return builder.createCodec(getType(),getName()).getFeatureType();
|
return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
|
||||||
}
|
}
|
||||||
|
|
||||||
public File getFile() {
|
public File getFile() {
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,82 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.examples;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* [Short one sentence description of this walker]
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* [Functionality of this walker]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* [Input description]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* [Output description]
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -T $WalkerName
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author Your Name
|
||||||
|
* @since Date created
|
||||||
|
*/
|
||||||
|
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
||||||
|
/**
|
||||||
|
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
||||||
|
* in doc annotation field, as that will be added before this text in the documentation page.
|
||||||
|
*
|
||||||
|
* Notes:
|
||||||
|
* <ul>
|
||||||
|
* <li>This field can contain HTML as a normal javadoc</li>
|
||||||
|
* <li>Don't include information about the default value, as gatkdocs adds this automatically</li>
|
||||||
|
* <li>Try your best to describe in detail the behavior of the argument, as ultimately confusing
|
||||||
|
* docs here will just result in user posts on the forum</li>
|
||||||
|
* </ul>
|
||||||
|
*/
|
||||||
|
@Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false)
|
||||||
|
private boolean myWalkerArgument = false;
|
||||||
|
|
||||||
|
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
|
||||||
|
public Integer reduceInit() { return 0; }
|
||||||
|
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||||
|
public void onTraversalDone(Integer result) { }
|
||||||
|
}
|
||||||
|
|
@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
||||||
if (!( walker instanceof TreeReducible ))
|
if (!( walker instanceof TreeReducible ))
|
||||||
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
|
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
|
||||||
|
|
||||||
traversalEngine.startTimers();
|
|
||||||
ReduceTree reduceTree = new ReduceTree(this);
|
ReduceTree reduceTree = new ReduceTree(this);
|
||||||
initializeWalker(walker);
|
initializeWalker(walker);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -44,15 +44,16 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
* @param shardStrategy A strategy for sharding the data.
|
* @param shardStrategy A strategy for sharding the data.
|
||||||
*/
|
*/
|
||||||
public Object execute(Walker walker, ShardStrategy shardStrategy) {
|
public Object execute(Walker walker, ShardStrategy shardStrategy) {
|
||||||
traversalEngine.startTimers();
|
|
||||||
walker.initialize();
|
walker.initialize();
|
||||||
Accumulator accumulator = Accumulator.create(engine,walker);
|
Accumulator accumulator = Accumulator.create(engine,walker);
|
||||||
|
|
||||||
|
boolean done = walker.isDone();
|
||||||
int counter = 0;
|
int counter = 0;
|
||||||
for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) {
|
for (Shard shard : shardStrategy ) {
|
||||||
if ( shard == null ) // we ran out of shards that aren't owned
|
if ( done || shard == null ) // we ran out of shards that aren't owned
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
traversalEngine.startTimersIfNecessary();
|
||||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||||
LocusWalker lWalker = (LocusWalker)walker;
|
LocusWalker lWalker = (LocusWalker)walker;
|
||||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
||||||
|
|
@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||||
accumulator.accumulate(dataProvider,result);
|
accumulator.accumulate(dataProvider,result);
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
|
if ( walker.isDone() ) break;
|
||||||
}
|
}
|
||||||
windowMaker.close();
|
windowMaker.close();
|
||||||
}
|
}
|
||||||
|
|
@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler {
|
||||||
accumulator.accumulate(dataProvider,result);
|
accumulator.accumulate(dataProvider,result);
|
||||||
dataProvider.close();
|
dataProvider.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
Object result = accumulator.finishTraversal();
|
Object result = accumulator.finishTraversal();
|
||||||
|
|
|
||||||
|
|
@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
||||||
import org.broadinstitute.sting.gatk.walkers.*;
|
import org.broadinstitute.sting.gatk.walkers.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.threading.*;
|
|
||||||
|
|
||||||
import javax.management.JMException;
|
import javax.management.JMException;
|
||||||
import javax.management.MBeanServer;
|
import javax.management.MBeanServer;
|
||||||
import javax.management.ObjectName;
|
import javax.management.ObjectName;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.PrintStream;
|
|
||||||
import java.lang.management.ManagementFactory;
|
import java.lang.management.ManagementFactory;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
|
|
||||||
|
|
@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
private final MBeanServer mBeanServer;
|
private final MBeanServer mBeanServer;
|
||||||
private final ObjectName mBeanName;
|
private final ObjectName mBeanName;
|
||||||
|
|
||||||
protected GenomeLocProcessingTracker processingTracker;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||||
* selected walker.
|
* selected walker.
|
||||||
|
|
@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
* @return The best-fit microscheduler.
|
* @return The best-fit microscheduler.
|
||||||
*/
|
*/
|
||||||
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
|
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
|
||||||
if (engine.getArguments().processingTrackerFile != null) {
|
|
||||||
if ( walker instanceof ReadWalker )
|
|
||||||
throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers"));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
|
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
|
||||||
if(walker.isReduceByInterval())
|
if(walker.isReduceByInterval())
|
||||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||||
|
|
@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
||||||
catch (JMException ex) {
|
catch (JMException ex) {
|
||||||
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
|
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
|
||||||
}
|
}
|
||||||
|
|
||||||
//
|
|
||||||
// create the processing tracker
|
|
||||||
//
|
|
||||||
if ( engine.getArguments().processingTrackerFile != null ) {
|
|
||||||
logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably.");
|
|
||||||
if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) {
|
|
||||||
engine.getArguments().processingTrackerFile.delete();
|
|
||||||
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
PrintStream statusStream = null;
|
|
||||||
if ( engine.getArguments().processingTrackerStatusFile != null ) {
|
|
||||||
try {
|
|
||||||
statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
|
|
||||||
} catch ( FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID);
|
|
||||||
processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ;
|
|
||||||
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID);
|
|
||||||
} else {
|
|
||||||
// create a NoOp version that doesn't do anything but say "yes"
|
|
||||||
processingTracker = new NoOpGenomeLocProcessingTracker();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
|
||||||
|
|
||||||
public Object call() {
|
public Object call() {
|
||||||
try {
|
try {
|
||||||
|
traversalEngine.startTimersIfNecessary();
|
||||||
long startTime = System.currentTimeMillis();
|
long startTime = System.currentTimeMillis();
|
||||||
|
|
||||||
Object accumulator = walker.reduceInit();
|
Object accumulator = walker.reduceInit();
|
||||||
|
|
|
||||||
|
|
@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
* Filter out FailsVendorQualityCheck reads.
|
* Filter out FailsVendorQualityCheck reads.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class FailsVendorQualityCheckReadFilter extends ReadFilter {
|
public class FailsVendorQualityCheckFilter extends ReadFilter {
|
||||||
public boolean filterOut( final SAMRecord read ) {
|
public boolean filterOut( final SAMRecord read ) {
|
||||||
return read.getReadFailsVendorQualityCheckFlag();
|
return read.getReadFailsVendorQualityCheckFlag();
|
||||||
}
|
}
|
||||||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MappingQualityReadFilter extends ReadFilter {
|
public class MappingQualityFilter extends ReadFilter {
|
||||||
|
|
||||||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||||
public int MIN_MAPPING_QUALTY_SCORE = 10;
|
public int MIN_MAPPING_QUALTY_SCORE = 10;
|
||||||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MappingQualityUnavailableReadFilter extends ReadFilter {
|
public class MappingQualityUnavailableFilter extends ReadFilter {
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
|
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
|
||||||
}
|
}
|
||||||
|
|
@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class MappingQualityZeroReadFilter extends ReadFilter {
|
public class MappingQualityZeroFilter extends ReadFilter {
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
return (rec.getMappingQuality() == 0);
|
return (rec.getMappingQuality() == 0);
|
||||||
}
|
}
|
||||||
|
|
@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
|
||||||
* Filter out duplicate reads.
|
* Filter out duplicate reads.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class NotPrimaryAlignmentReadFilter extends ReadFilter {
|
public class NotPrimaryAlignmentFilter extends ReadFilter {
|
||||||
public boolean filterOut( final SAMRecord read ) {
|
public boolean filterOut( final SAMRecord read ) {
|
||||||
return read.getNotPrimaryAlignmentFlag();
|
return read.getNotPrimaryAlignmentFlag();
|
||||||
}
|
}
|
||||||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||||
* @version 0.1
|
* @version 0.1
|
||||||
*/
|
*/
|
||||||
public class PlatformFilter extends ReadFilter {
|
public class PlatformFilter extends ReadFilter {
|
||||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
|
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
|
||||||
protected String[] PLFilterNames;
|
protected String[] PLFilterNames;
|
||||||
|
|
||||||
public boolean filterOut(SAMRecord rec) {
|
public boolean filterOut(SAMRecord rec) {
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A SamRecordFilter that also depends on the header.
|
* A SamRecordFilter that also depends on the header.
|
||||||
*/
|
*/
|
||||||
|
@DocumentedGATKFeature(
|
||||||
|
groupName = "Read filters",
|
||||||
|
summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" )
|
||||||
public abstract class ReadFilter implements SamRecordFilter {
|
public abstract class ReadFilter implements SamRecordFilter {
|
||||||
/**
|
/**
|
||||||
* Sets the header for use by this filter.
|
* Sets the header for use by this filter.
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,72 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2009 The Broad Institute
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.filters;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMRecord;
|
||||||
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A read filter (transformer) that sets all reads mapping quality to a given value.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
|
||||||
|
* all your mapping qualities to a given value. Default being 60.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* BAM file(s)
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* BAM file(s) with all reads mapping qualities reassigned
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java
|
||||||
|
* -jar GenomeAnalysisTK.jar
|
||||||
|
* -rf ReassignMappingQuality
|
||||||
|
* -DMQ 35
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* @author carneiro
|
||||||
|
* @since 8/8/11
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class ReassignMappingQualityFilter extends ReadFilter {
|
||||||
|
|
||||||
|
@Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
|
||||||
|
public int defaultMappingQuality = 60;
|
||||||
|
|
||||||
|
public boolean filterOut(SAMRecord rec) {
|
||||||
|
rec.setMappingQuality(defaultMappingQuality);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
writer.writeHeader(stub.getVCFHeader());
|
writer.writeHeader(stub.getVCFHeader());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void add(VariantContext vc, byte ref) {
|
public void add(VariantContext vc) {
|
||||||
writer.add(vc, ref);
|
writer.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
||||||
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
|
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
|
||||||
|
|
||||||
for ( VariantContext vc : source.iterator() ) {
|
for ( VariantContext vc : source.iterator() ) {
|
||||||
target.writer.add(vc, vc.getReferenceBaseForIndel());
|
target.writer.add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
source.close();
|
source.close();
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.lang.reflect.Constructor;
|
import java.lang.reflect.Constructor;
|
||||||
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
|
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
|
||||||
|
|
@ -69,16 +70,21 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) {
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
return "stdout";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||||
if(!source.isRequired())
|
if(!source.isRequired())
|
||||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||||
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
|
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
|
||||||
engine.addOutput(stub);
|
engine.addOutput(stub);
|
||||||
return createInstanceOfClass(type,stub);
|
return createInstanceOfClass((Class)type,stub);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||||
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
|
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
|
||||||
String fileName = getArgumentValue( definition, matches );
|
String fileName = getArgumentValue( definition, matches );
|
||||||
|
|
||||||
|
|
@ -91,7 +97,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
|
|
||||||
engine.addOutput(stub);
|
engine.addOutput(stub);
|
||||||
|
|
||||||
Object result = createInstanceOfClass(type,stub);
|
Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
|
||||||
// WARNING: Side effects required by engine!
|
// WARNING: Side effects required by engine!
|
||||||
parsingEngine.addTags(result,getArgumentTags(matches));
|
parsingEngine.addTags(result,getArgumentTags(matches));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
import java.lang.reflect.Type;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Describe how to parse SAMFileReaders.
|
* Describe how to parse SAMFileReaders.
|
||||||
|
|
@ -52,14 +53,13 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
this.engine = engine;
|
this.engine = engine;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean supports( Class type ) {
|
public boolean supports( Class type ) {
|
||||||
return SAMFileReader.class.isAssignableFrom(type);
|
return SAMFileReader.class.isAssignableFrom(type);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||||
|
|
||||||
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.lang.annotation.Annotation;
|
import java.lang.annotation.Annotation;
|
||||||
|
import java.lang.reflect.Type;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -93,7 +94,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
return "stdout";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||||
if(!source.isRequired())
|
if(!source.isRequired())
|
||||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||||
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
|
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
|
||||||
|
|
@ -102,7 +108,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||||
// Extract all possible parameters that could be passed to a BAM file writer?
|
// Extract all possible parameters that could be passed to a BAM file writer?
|
||||||
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
|
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
|
||||||
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );
|
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.lang.reflect.Type;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
|
@ -108,7 +109,12 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
public String typeDefaultDocString(ArgumentSource source) {
|
||||||
|
return "stdout";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||||
if(!source.isRequired())
|
if(!source.isRequired())
|
||||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||||
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
|
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
|
||||||
|
|
@ -124,7 +130,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||||
* @return Transform from the matches into the associated argument.
|
* @return Transform from the matches into the associated argument.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||||
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
|
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
|
||||||
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
|
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
|
||||||
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);
|
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.io.stubs;
|
package org.broadinstitute.sting.gatk.io.stubs;
|
||||||
|
|
||||||
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||||
|
|
@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||||
vcfHeader = header;
|
vcfHeader = header;
|
||||||
|
|
||||||
// Check for the command-line argument header line. If not present, add it in.
|
// Check for the command-line argument header line. If not present, add it in.
|
||||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
if ( !skipWritingHeader ) {
|
||||||
boolean foundCommandLineHeaderLine = false;
|
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||||
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
|
boolean foundCommandLineHeaderLine = false;
|
||||||
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
|
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||||
foundCommandLineHeaderLine = true;
|
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
|
||||||
|
foundCommandLineHeaderLine = true;
|
||||||
|
}
|
||||||
|
if ( !foundCommandLineHeaderLine )
|
||||||
|
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||||
|
|
||||||
|
// also put in the reference contig header lines
|
||||||
|
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
|
||||||
|
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
|
||||||
|
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
|
||||||
|
|
||||||
|
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
|
||||||
}
|
}
|
||||||
if(!foundCommandLineHeaderLine && !skipWritingHeader)
|
|
||||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
|
||||||
|
|
||||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||||
}
|
}
|
||||||
|
|
@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||||
/**
|
/**
|
||||||
* @{inheritDoc}
|
* @{inheritDoc}
|
||||||
*/
|
*/
|
||||||
public void add(VariantContext vc, byte ref) {
|
public void add(VariantContext vc) {
|
||||||
outputTracker.getStorage(this).add(vc,ref);
|
outputTracker.getStorage(this).add(vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
||||||
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
||||||
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
|
||||||
|
String val;
|
||||||
|
if ( assembly != null )
|
||||||
|
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
|
||||||
|
else
|
||||||
|
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
|
||||||
|
return new VCFHeaderLine("contig", val);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getReferenceAssembly(String refPath) {
|
||||||
|
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||||
|
String assembly = null;
|
||||||
|
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
|
||||||
|
assembly = "b37";
|
||||||
|
else if ( refPath.indexOf("b36") != -1 )
|
||||||
|
assembly = "b36";
|
||||||
|
else if ( refPath.indexOf("hg18") != -1 )
|
||||||
|
assembly = "hg18";
|
||||||
|
else if ( refPath.indexOf("hg19") != -1 )
|
||||||
|
assembly = "hg19";
|
||||||
|
return assembly;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format;
|
||||||
import org.simpleframework.xml.stream.HyphenStyle;
|
import org.simpleframework.xml.stream.HyphenStyle;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
import java.net.InetAddress;
|
|
||||||
import java.security.NoSuchAlgorithmException;
|
import java.security.NoSuchAlgorithmException;
|
||||||
import java.text.DateFormat;
|
import java.text.DateFormat;
|
||||||
import java.text.SimpleDateFormat;
|
import java.text.SimpleDateFormat;
|
||||||
|
|
@ -154,9 +153,13 @@ public class GATKRunReport {
|
||||||
private long nReads;
|
private long nReads;
|
||||||
|
|
||||||
public enum PhoneHomeOption {
|
public enum PhoneHomeOption {
|
||||||
|
/** Disable phone home */
|
||||||
NO_ET,
|
NO_ET,
|
||||||
|
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
|
||||||
STANDARD,
|
STANDARD,
|
||||||
|
/** Force output to STDOUT. For debugging only */
|
||||||
STDOUT,
|
STDOUT,
|
||||||
|
/** Force output to S3. For debugging only */
|
||||||
AWS_S3 // todo -- remove me -- really just for testing purposes
|
AWS_S3 // todo -- remove me -- really just for testing purposes
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -226,22 +229,6 @@ public class GATKRunReport {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
|
|
||||||
* unresolvable gets returned instead.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private String resolveHostname() {
|
|
||||||
try {
|
|
||||||
return InetAddress.getLocalHost().getCanonicalHostName();
|
|
||||||
}
|
|
||||||
catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
|
|
||||||
return "unresolvable";
|
|
||||||
// handle exception
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void postReport(PhoneHomeOption type) {
|
public void postReport(PhoneHomeOption type) {
|
||||||
logger.debug("Posting report of type " + type);
|
logger.debug("Posting report of type " + type);
|
||||||
switch (type) {
|
switch (type) {
|
||||||
|
|
@ -321,7 +308,7 @@ public class GATKRunReport {
|
||||||
|
|
||||||
private void postReportToAWSS3() {
|
private void postReportToAWSS3() {
|
||||||
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
||||||
this.hostName = resolveHostname(); // we want to fill in the host name
|
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
||||||
File localFile = postReportToLocalDisk(new File("./"));
|
File localFile = postReportToLocalDisk(new File("./"));
|
||||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||||
if ( localFile != null ) { // we succeeded in creating the local file
|
if ( localFile != null ) { // we succeeded in creating the local file
|
||||||
|
|
|
||||||
|
|
@ -1,238 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.lang.reflect.Constructor;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.regex.Pattern;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
|
|
||||||
* files. The only assumption made is that every line in the file provides a complete and separate data record. The records
|
|
||||||
* can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
|
|
||||||
* position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
|
|
||||||
* otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
|
|
||||||
* provided that is capable of parsing itself from a single line of data. This implementation will be used,
|
|
||||||
* through reflection mechanism, as a callback to do all the work.
|
|
||||||
*
|
|
||||||
* The model is, hence, as follows:
|
|
||||||
*
|
|
||||||
* String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
|
|
||||||
* an instance of RodImplementation with data values from dataRecord line).
|
|
||||||
*
|
|
||||||
*
|
|
||||||
* instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
|
|
||||||
* that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
|
|
||||||
* representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
|
|
||||||
* track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
|
|
||||||
* multiple streams of (possibly heterogeneous) annotation data bound to an application.
|
|
||||||
*
|
|
||||||
* This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
|
|
||||||
* to the ROD implementation class for processing.
|
|
||||||
*
|
|
||||||
* This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
|
|
||||||
* and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
|
|
||||||
* iterators/wrappers will perform all the necessary checks.
|
|
||||||
*
|
|
||||||
* Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
|
|
||||||
* mechanism is as follows:
|
|
||||||
* 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
|
|
||||||
* and return the header object (to be kept by the iterator)
|
|
||||||
* 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
|
|
||||||
* and
|
|
||||||
* 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
|
|
||||||
* method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
|
|
||||||
* other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
|
|
||||||
*
|
|
||||||
* Created by IntelliJ IDEA.
|
|
||||||
* User: asivache
|
|
||||||
* Date: Sep 10, 2009
|
|
||||||
* Time: 1:22:23 PM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
|
|
||||||
|
|
||||||
private PushbackIterator<String> reader;
|
|
||||||
|
|
||||||
// stores name of the track this iterator reads (will be also returned by getName() of ROD objects
|
|
||||||
// generated by this iterator)
|
|
||||||
private String name;
|
|
||||||
|
|
||||||
// we keep the file object, only to use file name in error reports
|
|
||||||
private File file;
|
|
||||||
|
|
||||||
// rod type; this is what we will instantiate for RODs at runtime
|
|
||||||
private Class<ROD> type;
|
|
||||||
|
|
||||||
private Object header = null; // Some RODs may use header
|
|
||||||
|
|
||||||
// field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
|
|
||||||
private String fieldDelimiter;
|
|
||||||
|
|
||||||
// constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
|
|
||||||
private Constructor<ROD> named_constructor;
|
|
||||||
|
|
||||||
// keep track of the lines we are reading. used for error messages only.
|
|
||||||
private long linenum = 0;
|
|
||||||
|
|
||||||
private boolean allow_empty = true;
|
|
||||||
private boolean allow_comments = true;
|
|
||||||
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
|
|
||||||
|
|
||||||
public RODRecordIterator(File file, String name, Class<ROD> type) {
|
|
||||||
try {
|
|
||||||
reader = new PushbackIterator<String>(new XReadLines(file));
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(file, e);
|
|
||||||
}
|
|
||||||
this.file = file;
|
|
||||||
this.name = name;
|
|
||||||
this.type = type;
|
|
||||||
try {
|
|
||||||
named_constructor = type.getConstructor(String.class);
|
|
||||||
}
|
|
||||||
catch (java.lang.NoSuchMethodException e) {
|
|
||||||
throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
|
|
||||||
}
|
|
||||||
ROD rod = instantiateROD(name);
|
|
||||||
fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
|
|
||||||
try {
|
|
||||||
header = rod.initialize(file);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns <tt>true</tt> if the iteration has more elements. (In other
|
|
||||||
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
|
|
||||||
* rather than throwing an exception.)
|
|
||||||
*
|
|
||||||
* @return <tt>true</tt> if the iterator has more elements.
|
|
||||||
*/
|
|
||||||
public boolean hasNext() {
|
|
||||||
if ( allow_empty || allow_comments ) {
|
|
||||||
while ( reader.hasNext() ) {
|
|
||||||
String line = reader.next();
|
|
||||||
if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
|
|
||||||
if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
|
|
||||||
// the line is not empty and not a comment line, so we have next after all
|
|
||||||
reader.pushback(line);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false; // oops, we end up here if there's nothing left
|
|
||||||
} else {
|
|
||||||
return reader.hasNext();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the next valid ROD record in the file, skipping empty and comment lines.
|
|
||||||
*
|
|
||||||
* @return the next element in the iteration.
|
|
||||||
* @throws java.util.NoSuchElementException
|
|
||||||
* iteration has no more elements.
|
|
||||||
*/
|
|
||||||
public ROD next() {
|
|
||||||
ROD n = null;
|
|
||||||
boolean parsed_ok = false;
|
|
||||||
String line ;
|
|
||||||
|
|
||||||
while ( ! parsed_ok && reader.hasNext() ) {
|
|
||||||
line = reader.next();
|
|
||||||
linenum++;
|
|
||||||
while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
|
|
||||||
allow_comments && line.charAt(0) == '#' ) {
|
|
||||||
if ( reader.hasNext() ) {
|
|
||||||
line = reader.next();
|
|
||||||
linenum++;
|
|
||||||
} else {
|
|
||||||
line = null;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
|
|
||||||
|
|
||||||
String parts[] = line.split(fieldDelimiter);
|
|
||||||
|
|
||||||
try {
|
|
||||||
n = instantiateROD(name);
|
|
||||||
parsed_ok = n.parseLine(header,parts) ;
|
|
||||||
}
|
|
||||||
catch ( Exception e ) {
|
|
||||||
throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
|
|
||||||
"\nOffending line: "+line+
|
|
||||||
"\nReason ("+e.getClass().getName()+")", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes from the underlying collection the last element returned by the
|
|
||||||
* iterator (optional operation). This method can be called only once per
|
|
||||||
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
|
|
||||||
* the underlying collection is modified while the iteration is in
|
|
||||||
* progress in any way other than by calling this method.
|
|
||||||
*
|
|
||||||
* @throws UnsupportedOperationException if the <tt>remove</tt>
|
|
||||||
* operation is not supported by this Iterator.
|
|
||||||
* @throws IllegalStateException if the <tt>next</tt> method has not
|
|
||||||
* yet been called, or the <tt>remove</tt> method has already
|
|
||||||
* been called after the last call to the <tt>next</tt>
|
|
||||||
* method.
|
|
||||||
*/
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
|
|
||||||
* of the ROD track.
|
|
||||||
* @param name
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private ROD instantiateROD(final String name) {
|
|
||||||
try {
|
|
||||||
return (ROD) named_constructor.newInstance(name);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,13 +1,15 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -18,348 +20,402 @@ import java.util.*;
|
||||||
* The standard interaction model is:
|
* The standard interaction model is:
|
||||||
*
|
*
|
||||||
* Traversal system arrives at a site, which has a bunch of RMDs covering it
|
* Traversal system arrives at a site, which has a bunch of RMDs covering it
|
||||||
Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs
|
* Traversal passes creates a tracker and passes it to the walker
|
||||||
* Traversal passes tracker to the walker
|
* walker calls get(rodBinding) to obtain the RMDs values at this site for the track
|
||||||
* walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was
|
* associated with rodBinding.
|
||||||
* bound at this site.
|
*
|
||||||
|
* Note that this is an immutable class. Once created the underlying data structures
|
||||||
|
* cannot be modified
|
||||||
*
|
*
|
||||||
* User: mdepristo
|
* User: mdepristo
|
||||||
* Date: Apr 3, 2009
|
* Date: Apr 3, 2009
|
||||||
* Time: 3:05:23 PM
|
* Time: 3:05:23 PM
|
||||||
*/
|
*/
|
||||||
public class RefMetaDataTracker {
|
public class RefMetaDataTracker {
|
||||||
|
// TODO: this should be a list, not a map, actually
|
||||||
|
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
|
||||||
|
|
||||||
final Map<String, RODRecordList> map;
|
final Map<String, RODRecordList> map;
|
||||||
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
final ReferenceContext ref;
|
||||||
|
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||||
|
|
||||||
public RefMetaDataTracker(int nBindings) {
|
// ------------------------------------------------------------------------------------------
|
||||||
if ( nBindings == 0 )
|
//
|
||||||
|
//
|
||||||
|
// Special ENGINE interaction functions
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
|
||||||
|
this.ref = ref;
|
||||||
|
|
||||||
|
// set up the map
|
||||||
|
if ( allBindings.isEmpty() )
|
||||||
map = Collections.emptyMap();
|
map = Collections.emptyMap();
|
||||||
else
|
else {
|
||||||
map = new HashMap<String, RODRecordList>(nBindings);
|
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
|
||||||
|
for ( RODRecordList rod : allBindings ) {
|
||||||
|
if ( rod != null && ! rod.isEmpty() )
|
||||||
|
tmap.put(canonicalName(rod.getName()), rod);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ensure that no one modifies the map itself
|
||||||
|
map = Collections.unmodifiableMap(tmap);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Generic accessors
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Gets all of the Tribble features spanning this locus, returning them as a list of specific
|
||||||
|
* type T extending Feature. This function looks across all tracks to find the Features, so
|
||||||
|
* if you have two tracks A and B each containing 1 Feature, then getValues will return
|
||||||
|
* a list containing both features.
|
||||||
|
*
|
||||||
|
* Note that this function assumes that all of the bound features are instances of or
|
||||||
|
* subclasses of T. A ClassCastException will occur if this isn't the case. If you want
|
||||||
|
* to get all Features without any danger of such an exception use the root Tribble
|
||||||
|
* interface Feature.
|
||||||
|
*
|
||||||
|
* @param type The type of the underlying objects bound here
|
||||||
|
* @param <T> as above
|
||||||
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"type != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public <T extends Feature> List<T> getValues(final Class<T> type) {
|
||||||
|
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get all the reference meta data associated with a track name.
|
* Provides the same functionality as @link #getValues(Class<T>) but will only include
|
||||||
* @param name the name of the track we're looking for
|
* Features that start as the GenomeLoc provide onlyAtThisLoc.
|
||||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
|
||||||
* dbSNP RMD this will be a RodDbSNP, etc.
|
|
||||||
*
|
*
|
||||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
* @param type The type of the underlying objects bound here
|
||||||
|
* @param onlyAtThisLoc
|
||||||
|
* @param <T> as above
|
||||||
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
*/
|
*/
|
||||||
public List<Object> getReferenceMetaData(final String name) {
|
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||||
RODRecordList list = getTrackDataByName(name, true);
|
@Ensures("result != null")
|
||||||
List<Object> objects = new ArrayList<Object>();
|
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||||
if (list == null) return objects;
|
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||||
for (GATKFeature feature : list)
|
|
||||||
objects.add(feature.getUnderlyingObject());
|
|
||||||
return objects;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get all the reference meta data associated with a track name.
|
* Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
|
||||||
* @param name the name of the track we're looking for
|
* elements of the list to return. That is, if there would be two elements in the result of
|
||||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
* @link #getValues(Class), one of these two is selected, and which one it will be isn't
|
||||||
* the passed in parameter (false).
|
* specified. Consequently, this method is only really safe if (1) you absolutely know
|
||||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
* that only one binding will meet the constraints of @link #getValues(Class) or (2)
|
||||||
* dbSNP rod this will be a RodDbSNP, etc.
|
* you truly don't care which of the multiple bindings available you are going to examine.
|
||||||
*
|
*
|
||||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
* If there are no bindings here, getFirstValue() return null
|
||||||
|
*
|
||||||
|
* @param type The type of the underlying objects bound here
|
||||||
|
* @param <T> as above
|
||||||
|
* @return A random single element the RODs bound here, or null if none are bound.
|
||||||
*/
|
*/
|
||||||
public List<Object> getReferenceMetaData(final String name, boolean requireExactMatch) {
|
@Requires({"type != null"})
|
||||||
RODRecordList list = getTrackDataByName(name, requireExactMatch);
|
public <T extends Feature> T getFirstValue(final Class<T> type) {
|
||||||
List<Object> objects = new ArrayList<Object>();
|
return safeGetFirst(getValues(type));
|
||||||
if (list == null) return objects;
|
|
||||||
for (GATKFeature feature : list)
|
|
||||||
objects.add(feature.getUnderlyingObject());
|
|
||||||
return objects;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get all the GATK features associated with a specific track name
|
* Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
|
||||||
* @param name the name of the track we're looking for
|
* of eligible Features and @link #getFirstValue(Class) to select a single
|
||||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
* element from the interval list.
|
||||||
* the passed in parameter (false).
|
|
||||||
* @return a list of GATKFeatures for the target rmd
|
|
||||||
*
|
*
|
||||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
* @param type The type of the underlying objects bound here
|
||||||
|
* @param <T> as above
|
||||||
|
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||||
|
* @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
|
||||||
*/
|
*/
|
||||||
public List<GATKFeature> getGATKFeatureMetaData(final String name, boolean requireExactMatch) {
|
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||||
List<GATKFeature> feat = getTrackDataByName(name,requireExactMatch);
|
public <T extends Feature> T getFirstValue(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||||
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null
|
return safeGetFirst(getValues(type, onlyAtThisLoc));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* get a singleton record, given the name and a type. This function will return the first record at the current position seen,
|
* Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
|
||||||
* and emit a logger warning if there were more than one option.
|
* a list of specific type T extending Feature.
|
||||||
*
|
*
|
||||||
* WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are
|
* Note that this function assumes that all of the bound features are instances of or
|
||||||
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
|
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||||
* picked may change from time to time! BE WARNED!
|
*
|
||||||
*
|
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||||
* @param name the name of the track
|
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||||
* @param clazz the underlying type to return
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
* @param <T> the type to parameterize on, matching the clazz argument
|
|
||||||
* @return a record of type T, or null if no record is present.
|
|
||||||
*/
|
*/
|
||||||
@Deprecated
|
@Requires({"rodBinding != null"})
|
||||||
public <T> T lookup(final String name, Class<T> clazz) {
|
@Ensures("result != null")
|
||||||
RODRecordList objects = getTrackDataByName(name, true);
|
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding) {
|
||||||
|
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), null, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
// if emtpy or null return null;
|
/**
|
||||||
if (objects == null || objects.size() < 1) return null;
|
* Gets all of the Tribble features bound to any RodBinding in rodBindings,
|
||||||
|
* spanning this locus, returning them as a list of specific type T extending Feature.
|
||||||
|
*
|
||||||
|
* Note that this function assumes that all of the bound features are instances of or
|
||||||
|
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||||
|
*
|
||||||
|
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||||
|
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||||
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBindings != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings) {
|
||||||
|
List<T> results = new ArrayList<T>(1);
|
||||||
|
for ( RodBinding<T> rodBinding : rodBindings )
|
||||||
|
results.addAll(getValues(rodBinding));
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
if (objects.size() > 1)
|
/**
|
||||||
logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options");
|
* The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
|
||||||
|
*
|
||||||
|
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||||
|
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||||
|
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||||
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
|
||||||
|
}
|
||||||
|
|
||||||
Object obj = objects.get(0).getUnderlyingObject();
|
/**
|
||||||
if (!(clazz.isAssignableFrom(obj.getClass())))
|
* The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
|
||||||
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
|
*
|
||||||
+ " it's of type " + obj.getClass());
|
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||||
|
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||||
|
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||||
|
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
List<T> results = new ArrayList<T>(1);
|
||||||
|
for ( RodBinding<T> rodBinding : rodBindings )
|
||||||
|
results.addAll(getValues(rodBinding, onlyAtThisLoc));
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
return (T)obj;
|
/**
|
||||||
|
* Uses the same logic as @getValues(RodBinding) to determine the list
|
||||||
|
* of eligible Features and select a single element from the resulting set
|
||||||
|
* of eligible features.
|
||||||
|
*
|
||||||
|
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||||
|
* @param <T> as above
|
||||||
|
* @return A random single element the eligible Features found, or null if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBinding != null"})
|
||||||
|
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding) {
|
||||||
|
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
|
||||||
|
* of eligible Features and select a single element from the resulting set
|
||||||
|
* of eligible features.
|
||||||
|
*
|
||||||
|
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||||
|
* @param <T> as above
|
||||||
|
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||||
|
* @return A random single element the eligible Features found, or null if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||||
|
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the same logic as @getValues(List) to determine the list
|
||||||
|
* of eligible Features and select a single element from the resulting set
|
||||||
|
* of eligible features.
|
||||||
|
*
|
||||||
|
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||||
|
* @param <T> as above
|
||||||
|
* @return A random single element the eligible Features found, or null if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBindings != null"})
|
||||||
|
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings) {
|
||||||
|
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||||
|
T val = getFirstValue(rodBinding);
|
||||||
|
if ( val != null )
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
|
||||||
|
* of eligible Features and select a single element from the resulting set
|
||||||
|
* of eligible features.
|
||||||
|
*
|
||||||
|
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||||
|
* @param <T> as above
|
||||||
|
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||||
|
* @return A random single element the eligible Features found, or null if none are bound.
|
||||||
|
*/
|
||||||
|
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||||
|
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||||
|
T val = getFirstValue(rodBinding, onlyAtThisLoc);
|
||||||
|
if ( val != null )
|
||||||
|
return val;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Is there a binding at this site to a ROD/track with the specified name?
|
* Is there a binding at this site to a ROD/track with the specified name?
|
||||||
*
|
*
|
||||||
* @param name the name of the rod
|
* @param rodBinding the rod binding we want to know about
|
||||||
* @return true if it has the rod
|
* @return true if any Features are bound in this tracker to rodBinding
|
||||||
*/
|
*/
|
||||||
public boolean hasROD(final String name) {
|
@Requires({"rodBinding != null"})
|
||||||
return map.containsKey(canonicalName(name));
|
public boolean hasValues(final RodBinding rodBinding) {
|
||||||
}
|
return map.containsKey(canonicalName(rodBinding.getName()));
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
|
|
||||||
* at the current site, they all will be added to the list as separate elements.
|
|
||||||
*
|
|
||||||
* @return collection of all rods
|
|
||||||
*/
|
|
||||||
public Collection<GATKFeature> getAllRods() {
|
|
||||||
List<GATKFeature> l = new ArrayList<GATKFeature>();
|
|
||||||
for ( RODRecordList rl : map.values() ) {
|
|
||||||
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
|
|
||||||
l.addAll(rl);
|
|
||||||
}
|
|
||||||
return l;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
|
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
|
||||||
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
|
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
|
||||||
*
|
*
|
||||||
* @return collection of all tracks
|
* @return List of all tracks
|
||||||
*/
|
*/
|
||||||
public Collection<RODRecordList> getBoundRodTracks() {
|
public List<RODRecordList> getBoundRodTracks() {
|
||||||
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
|
return new ArrayList<RODRecordList>(map.values());
|
||||||
|
|
||||||
for ( RODRecordList value : map.values() ) {
|
|
||||||
if ( value != null && value.size() != 0 ) bound.add(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
return bound;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @return the number of ROD bindings (name -> value) where value is not empty in this tracker
|
* The number of tracks with at least one value bound here
|
||||||
|
* @return the number of tracks with at least one bound Feature
|
||||||
*/
|
*/
|
||||||
public int getNBoundRodTracks() {
|
public int getNTracksWithBoundFeatures() {
|
||||||
return getNBoundRodTracks(null);
|
return map.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getNBoundRodTracks(final String excludeIn ) {
|
// ------------------------------------------------------------------------------------------
|
||||||
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
|
//
|
||||||
|
//
|
||||||
|
// old style accessors
|
||||||
|
//
|
||||||
|
// TODO -- DELETE ME
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
int n = 0;
|
@Deprecated
|
||||||
for ( RODRecordList value : map.values() ) {
|
public boolean hasValues(final String name) {
|
||||||
if ( value != null && ! value.isEmpty() ) {
|
return map.containsKey(canonicalName(name));
|
||||||
if ( exclude == null || ! value.getName().equals(exclude) )
|
|
||||||
n++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return n;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Deprecated
|
||||||
|
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
|
||||||
|
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
|
||||||
|
}
|
||||||
|
@Deprecated
|
||||||
|
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||||
|
}
|
||||||
|
@Deprecated
|
||||||
|
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||||
|
return safeGetFirst(getValues(type, name));
|
||||||
|
}
|
||||||
|
@Deprecated
|
||||||
|
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||||
|
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Private utility functions
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// ------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
|
* Helper function for getFirst() operations that takes a list of <T> and
|
||||||
* system to provide access to RMDs in a structured way to the walkers.
|
* returns the first element, or null if no such element exists.
|
||||||
*
|
*
|
||||||
* @param name the name of the track
|
* @param l
|
||||||
* @param rod the collection of RMD data
|
* @param <T>
|
||||||
*/
|
|
||||||
public void bind(final String name, RODRecordList rod) {
|
|
||||||
//logger.debug(String.format("Binding %s to %s", name, rod));
|
|
||||||
map.put(canonicalName(name), rod);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
|
|
||||||
* of entries per ROD.
|
|
||||||
* The name of each VariantContext corresponds to the ROD name.
|
|
||||||
*
|
|
||||||
* @param ref reference context
|
|
||||||
* @return variant context
|
|
||||||
*/
|
|
||||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
|
|
||||||
return getAllVariantContexts(ref, null, null, false, false);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns all of the variant contexts that start at the current location
|
|
||||||
* @param ref
|
|
||||||
* @param curLocation
|
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) {
|
@Requires({"l != null"})
|
||||||
return getAllVariantContexts(ref, null, curLocation, true, false);
|
final private <T extends Feature> T safeGetFirst(final List<T> l) {
|
||||||
|
return l.isEmpty() ? null : l.get(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
private <T extends Feature> List<T> addValues(final Collection<String> names,
|
||||||
* Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only
|
final Class<T> type,
|
||||||
* VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation
|
List<T> values,
|
||||||
* must not be null, and only records whose start position is == to curLocation.getStart() will be returned.
|
final GenomeLoc curLocation,
|
||||||
* If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course,
|
final boolean requireStartHere,
|
||||||
* this single object must pass the allowed types and start here options if provided. Note that the result
|
final boolean takeFirstOnly ) {
|
||||||
* may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
|
|
||||||
* the current location.
|
|
||||||
*
|
|
||||||
* The name of each VariantContext corresponds to the ROD name.
|
|
||||||
*
|
|
||||||
* @param ref reference context
|
|
||||||
* @param allowedTypes allowed types
|
|
||||||
* @param curLocation location
|
|
||||||
* @param requireStartHere do we require the rod to start at this location?
|
|
||||||
* @param takeFirstOnly do we take the first rod only?
|
|
||||||
* @return variant context
|
|
||||||
*/
|
|
||||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
List<VariantContext> contexts = new ArrayList<VariantContext>();
|
|
||||||
|
|
||||||
for ( RODRecordList rodList : getBoundRodTracks() ) {
|
|
||||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
|
||||||
}
|
|
||||||
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the variant contexts associated with track name name
|
|
||||||
*
|
|
||||||
* see getVariantContexts for more information.
|
|
||||||
*
|
|
||||||
* @param ref ReferenceContext to enable conversion to variant context
|
|
||||||
* @param name name
|
|
||||||
* @param curLocation location
|
|
||||||
* @param allowedTypes allowed types
|
|
||||||
* @param requireStartHere do we require the rod to start at this location?
|
|
||||||
* @param takeFirstOnly do we take the first rod only?
|
|
||||||
* @return variant context
|
|
||||||
*/
|
|
||||||
// public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
|
||||||
}
|
|
||||||
|
|
||||||
// public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
|
||||||
// }
|
|
||||||
|
|
||||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
|
||||||
|
|
||||||
for ( String name : names ) {
|
for ( String name : names ) {
|
||||||
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
|
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
|
||||||
|
values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||||
if ( rodList != null )
|
if ( takeFirstOnly && ! values.isEmpty() )
|
||||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return contexts;
|
return values;
|
||||||
}
|
|
||||||
|
|
||||||
public Collection<VariantContext> getVariantContextsByPrefix(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
|
||||||
|
|
||||||
for ( String name : names ) {
|
|
||||||
RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
|
|
||||||
|
|
||||||
if ( rodList != null )
|
|
||||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
|
||||||
}
|
|
||||||
|
|
||||||
return contexts;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
|
|
||||||
* see getVariantContexts for more information.
|
|
||||||
*
|
|
||||||
* @param name name
|
|
||||||
* @param curLocation location
|
|
||||||
* @param allowedTypes allowed types
|
|
||||||
* @param requireStartHere do we require the rod to start at this location?
|
|
||||||
* @return variant context
|
|
||||||
*/
|
|
||||||
public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
|
|
||||||
Collection<VariantContext> contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
|
|
||||||
|
|
||||||
if ( contexts.size() > 1 )
|
|
||||||
throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
|
|
||||||
else if ( contexts.size() == 0 )
|
|
||||||
return null;
|
|
||||||
else
|
|
||||||
return contexts.iterator().next();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
|
|
||||||
* null if there's no binding here.
|
|
||||||
*
|
|
||||||
* @param ref
|
|
||||||
* @param name
|
|
||||||
* @param curLocation
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
|
|
||||||
return getVariantContext(ref, name, null, curLocation, true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
|
||||||
|
private <T extends Feature> List<T> addValues(final String name,
|
||||||
|
final Class<T> type,
|
||||||
|
List<T> values,
|
||||||
|
final RODRecordList rodList,
|
||||||
|
final GenomeLoc curLocation,
|
||||||
|
final boolean requireStartHere,
|
||||||
|
final boolean takeFirstOnly ) {
|
||||||
for ( GATKFeature rec : rodList ) {
|
for ( GATKFeature rec : rodList ) {
|
||||||
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
|
if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
|
||||||
// ok, we might actually be able to turn this record in a variant context
|
Object obj = rec.getUnderlyingObject();
|
||||||
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
|
if (!(type.isAssignableFrom(obj.getClass())))
|
||||||
|
throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
|
||||||
|
+ " it's of type " + obj.getClass());
|
||||||
|
|
||||||
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
|
T objT = (T)obj;
|
||||||
continue;
|
if ( takeFirstOnly ) {
|
||||||
|
if ( values == null )
|
||||||
|
values = Arrays.asList(objT);
|
||||||
|
else
|
||||||
|
values.add(objT);
|
||||||
|
|
||||||
// now, let's decide if we want to keep it
|
break;
|
||||||
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
|
} else {
|
||||||
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
|
if ( values == null )
|
||||||
|
values = new ArrayList<T>();
|
||||||
if ( goodType && goodPos ) { // ok, we are going to keep this thing
|
values.add(objT);
|
||||||
contexts.add(vc);
|
|
||||||
|
|
||||||
if ( takeFirstOnly )
|
|
||||||
// we only want the first passing instance, so break the loop over records in rodList
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return values == null ? Collections.<T>emptyList() : values;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
|
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
|
||||||
* with the current site as a RODRecordList collection object. If no data track with specified name is available,
|
* with the current site as a RODRecordList List object. If no data track with specified name is available,
|
||||||
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
|
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
|
||||||
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
|
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
|
||||||
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
|
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
|
||||||
|
|
@ -367,29 +423,16 @@ public class RefMetaDataTracker {
|
||||||
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
|
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
|
||||||
* regardless of the presence of "extended" RODs overlapping with that location).
|
* regardless of the presence of "extended" RODs overlapping with that location).
|
||||||
* @param name track name
|
* @param name track name
|
||||||
* @param requireExactMatch do we require an exact match of the rod name?
|
|
||||||
* @return track data for the given rod
|
* @return track data for the given rod
|
||||||
*/
|
*/
|
||||||
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
|
private RODRecordList getTrackDataByName(final String name) {
|
||||||
//logger.debug(String.format("Lookup %s%n", name));
|
|
||||||
|
|
||||||
final String luName = canonicalName(name);
|
final String luName = canonicalName(name);
|
||||||
RODRecordList trackData = null;
|
RODRecordList l = map.get(luName);
|
||||||
|
return l == null ? EMPTY_ROD_RECORD_LIST : l;
|
||||||
|
}
|
||||||
|
|
||||||
if ( requireExactMatch ) {
|
private RODRecordList getTrackDataByName(final RodBinding binding) {
|
||||||
if ( map.containsKey(luName) )
|
return getTrackDataByName(binding.getName());
|
||||||
trackData = map.get(luName);
|
|
||||||
} else {
|
|
||||||
for ( Map.Entry<String, RODRecordList> datum : map.entrySet() ) {
|
|
||||||
final String rodName = datum.getKey();
|
|
||||||
if ( datum.getValue() != null && rodName.startsWith(luName) ) {
|
|
||||||
if ( trackData == null ) trackData = new RODRecordListImpl(name);
|
|
||||||
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
|
|
||||||
((RODRecordListImpl)trackData).add(datum.getValue(), true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return trackData;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -398,6 +441,7 @@ public class RefMetaDataTracker {
|
||||||
* @return canonical name of the rod
|
* @return canonical name of the rod
|
||||||
*/
|
*/
|
||||||
private final String canonicalName(final String name) {
|
private final String canonicalName(final String name) {
|
||||||
|
// todo -- remove me after switch to RodBinding syntax
|
||||||
return name.toLowerCase();
|
return name.toLowerCase();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,130 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
|
||||||
|
|
||||||
import java.io.*;
|
|
||||||
import java.lang.reflect.Method;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class for representing arbitrary reference ordered data sets
|
|
||||||
* <p/>
|
|
||||||
* User: mdepristo
|
|
||||||
* Date: Feb 27, 2009
|
|
||||||
* Time: 10:47:14 AM
|
|
||||||
* To change this template use File | Settings | File Templates.
|
|
||||||
*/
|
|
||||||
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ReferenceOrderedDatum> {
|
|
||||||
private String name;
|
|
||||||
private File file = null;
|
|
||||||
// private String fieldDelimiter;
|
|
||||||
|
|
||||||
/** Header object returned from the datum */
|
|
||||||
// private Object header = null;
|
|
||||||
|
|
||||||
private Class<ROD> type = null; // runtime type information for object construction
|
|
||||||
|
|
||||||
/** our log, which we want to capture anything from this class */
|
|
||||||
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* given an existing file, open it and append all the valid triplet lines to an existing list
|
|
||||||
*
|
|
||||||
* @param rodTripletList the list of existing triplets
|
|
||||||
* @param filename the file to attempt to extract ROD triplets from
|
|
||||||
*/
|
|
||||||
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
|
|
||||||
BufferedReader str;
|
|
||||||
try {
|
|
||||||
str = new BufferedReader(new FileReader(new File(filename)));
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e);
|
|
||||||
}
|
|
||||||
String line = "NO LINES READ IN";
|
|
||||||
try {
|
|
||||||
while ((line = str.readLine()) != null) {
|
|
||||||
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
|
|
||||||
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Constructors
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
|
|
||||||
this.name = name;
|
|
||||||
this.file = file;
|
|
||||||
this.type = type;
|
|
||||||
// this.header = initializeROD(name, file, type);
|
|
||||||
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() { return name; }
|
|
||||||
|
|
||||||
public File getFile() { return file; }
|
|
||||||
|
|
||||||
public Class<ROD> getType() { return type; }
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Special equals override to see if this ROD is compatible with the given
|
|
||||||
* name and type. 'Compatible' means that this ROD has the name that's passed
|
|
||||||
* in and its data can fit into the container specified by type.
|
|
||||||
*
|
|
||||||
* @param name Name to check.
|
|
||||||
* @param type Type to check.
|
|
||||||
*
|
|
||||||
* @return True if these parameters imply this rod. False otherwise.
|
|
||||||
*/
|
|
||||||
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
|
|
||||||
return this.name.equals(name) && type.isAssignableFrom(this.type);
|
|
||||||
}
|
|
||||||
|
|
||||||
public Iterator<ReferenceOrderedDatum> iterator() {
|
|
||||||
Iterator<ReferenceOrderedDatum> it;
|
|
||||||
try {
|
|
||||||
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
|
|
||||||
it = (Iterator<ReferenceOrderedDatum>) m.invoke(null, name, file);
|
|
||||||
} catch (java.lang.NoSuchMethodException e) {
|
|
||||||
it = new RODRecordIterator(file,name,type);
|
|
||||||
} catch (java.lang.NullPointerException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (java.lang.SecurityException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (java.lang.IllegalAccessException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (java.lang.IllegalArgumentException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
} catch (java.lang.reflect.InvocationTargetException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
// return new RODIterator<ROD>(it);
|
|
||||||
return it;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Manipulations of all of the data
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
|
|
||||||
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
|
|
||||||
final FileWriter out = new FileWriter(output);
|
|
||||||
|
|
||||||
for (ReferenceOrderedDatum rec : data) {
|
|
||||||
out.write(rec.repl() + "\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
out.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,48 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An interface marking that a given Tribble codec can look at the file and determine whether the
|
||||||
|
* codec specifically parsing the contents of the file.
|
||||||
|
*/
|
||||||
|
public interface SelfScopingFeatureCodec {
|
||||||
|
/**
|
||||||
|
* This function returns true iff the File potentialInput can be parsed by this
|
||||||
|
* codec.
|
||||||
|
*
|
||||||
|
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
|
||||||
|
* return true for the same file. If this occurs the GATK splits out an error.
|
||||||
|
*
|
||||||
|
* Note this function must never throw an error. All errors should be trapped
|
||||||
|
* and false returned.
|
||||||
|
*
|
||||||
|
* @param potentialInput the file to test for parsiability with this codec
|
||||||
|
* @return true if potentialInput can be parsed, false otherwise
|
||||||
|
*/
|
||||||
|
public boolean canDecode(final File potentialInput);
|
||||||
|
}
|
||||||
|
|
@ -1,12 +1,13 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata;
|
package org.broadinstitute.sting.gatk.refdata;
|
||||||
|
|
||||||
|
import net.sf.samtools.util.SequenceUtil;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
import org.broad.tribble.annotation.Strand;
|
||||||
|
import org.broad.tribble.dbsnp.OldDbSNPFeature;
|
||||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
|
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
@ -92,28 +93,89 @@ public class VariantContextAdaptors {
|
||||||
// --------------------------------------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------------------------------------
|
||||||
|
|
||||||
private static class DBSnpAdaptor implements VCAdaptor {
|
private static class DBSnpAdaptor implements VCAdaptor {
|
||||||
|
private static boolean isSNP(OldDbSNPFeature feature) {
|
||||||
|
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isMNP(OldDbSNPFeature feature) {
|
||||||
|
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isInsertion(OldDbSNPFeature feature) {
|
||||||
|
return feature.getVariantType().contains("insertion");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isDeletion(OldDbSNPFeature feature) {
|
||||||
|
return feature.getVariantType().contains("deletion");
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isIndel(OldDbSNPFeature feature) {
|
||||||
|
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static boolean isComplexIndel(OldDbSNPFeature feature) {
|
||||||
|
return feature.getVariantType().contains("in-del");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||||
|
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||||
|
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||||
|
* frequency).
|
||||||
|
*
|
||||||
|
* @return an alternate allele list
|
||||||
|
*/
|
||||||
|
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
|
||||||
|
List<String> ret = new ArrayList<String>();
|
||||||
|
for (String allele : getAlleleList(feature))
|
||||||
|
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* gets the alleles. This method should return all the alleles present at the location,
|
||||||
|
* including the reference base. The first allele should always be the reference allele, followed
|
||||||
|
* by an unordered list of alternate alleles.
|
||||||
|
*
|
||||||
|
* @return an alternate allele list
|
||||||
|
*/
|
||||||
|
public static List<String> getAlleleList(OldDbSNPFeature feature) {
|
||||||
|
List<String> alleleList = new ArrayList<String>();
|
||||||
|
// add ref first
|
||||||
|
if ( feature.getStrand() == Strand.POSITIVE )
|
||||||
|
alleleList = Arrays.asList(feature.getObserved());
|
||||||
|
else
|
||||||
|
for (String str : feature.getObserved())
|
||||||
|
alleleList.add(SequenceUtil.reverseComplement(str));
|
||||||
|
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
|
||||||
|
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
|
||||||
|
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
|
||||||
|
|
||||||
|
return alleleList;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts non-VCF formatted dbSNP records to VariantContext.
|
* Converts non-VCF formatted dbSNP records to VariantContext.
|
||||||
* @return DbSNPFeature.
|
* @return OldDbSNPFeature.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; }
|
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
|
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||||
return null;
|
return null;
|
||||||
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
|
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
||||||
|
|
||||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || DbSNPHelper.isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||||
// add the reference allele
|
// add the reference allele
|
||||||
List<Allele> alleles = new ArrayList<Allele>();
|
List<Allele> alleles = new ArrayList<Allele>();
|
||||||
alleles.add(refAllele);
|
alleles.add(refAllele);
|
||||||
|
|
||||||
// add all of the alt alleles
|
// add all of the alt alleles
|
||||||
boolean sawNullAllele = false;
|
boolean sawNullAllele = refAllele.isNull();
|
||||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -127,14 +189,13 @@ public class VariantContextAdaptors {
|
||||||
Map<String, Object> attributes = new HashMap<String, Object>();
|
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||||
|
|
||||||
if ( sawNullAllele ) {
|
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
if ( index < 0 )
|
||||||
if ( index < 0 )
|
return null; // we weren't given enough reference context to create the VariantContext
|
||||||
return null; // we weren't given enough reference context to create the VariantContext
|
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
|
|
||||||
}
|
Map<String, Genotype> genotypes = null;
|
||||||
Collection<Genotype> genotypes = null;
|
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
||||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
|
||||||
return vc;
|
return vc;
|
||||||
} else
|
} else
|
||||||
return null; // can't handle anything else
|
return null; // can't handle anything else
|
||||||
|
|
@ -164,16 +225,6 @@ public class VariantContextAdaptors {
|
||||||
@Override
|
@Override
|
||||||
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
||||||
|
|
||||||
/**
|
|
||||||
* convert to a Variant Context, given:
|
|
||||||
* @param name the name of the ROD
|
|
||||||
* @param input the Rod object, in this case a RodGeliText
|
|
||||||
* @return a VariantContext object
|
|
||||||
*/
|
|
||||||
// VariantContext convert(String name, Object input) {
|
|
||||||
// return convert(name, input, null);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* convert to a Variant Context, given:
|
* convert to a Variant Context, given:
|
||||||
* @param name the name of the ROD
|
* @param name the name of the ROD
|
||||||
|
|
@ -237,17 +288,7 @@ public class VariantContextAdaptors {
|
||||||
* @return HapMapFeature.
|
* @return HapMapFeature.
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
|
public Class<? extends Feature> getAdaptableFeatureType() { return RawHapMapFeature.class; }
|
||||||
|
|
||||||
/**
|
|
||||||
* convert to a Variant Context, given:
|
|
||||||
* @param name the name of the ROD
|
|
||||||
* @param input the Rod object, in this case a RodGeliText
|
|
||||||
* @return a VariantContext object
|
|
||||||
*/
|
|
||||||
// VariantContext convert(String name, Object input) {
|
|
||||||
// return convert(name, input, null);
|
|
||||||
// }
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* convert to a Variant Context, given:
|
* convert to a Variant Context, given:
|
||||||
|
|
@ -261,7 +302,12 @@ public class VariantContextAdaptors {
|
||||||
if ( ref == null )
|
if ( ref == null )
|
||||||
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
|
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
|
||||||
|
|
||||||
HapMapFeature hapmap = (HapMapFeature)input;
|
RawHapMapFeature hapmap = (RawHapMapFeature)input;
|
||||||
|
|
||||||
|
int index = hapmap.getStart() - ref.getWindow().getStart();
|
||||||
|
if ( index < 0 )
|
||||||
|
return null; // we weren't given enough reference context to create the VariantContext
|
||||||
|
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||||
|
|
||||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||||
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
||||||
|
|
@ -271,7 +317,7 @@ public class VariantContextAdaptors {
|
||||||
// use the actual alleles, if available
|
// use the actual alleles, if available
|
||||||
if ( alleleMap != null ) {
|
if ( alleleMap != null ) {
|
||||||
alleles.addAll(alleleMap.values());
|
alleles.addAll(alleleMap.values());
|
||||||
Allele deletionAllele = alleleMap.get(HapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
|
Allele deletionAllele = alleleMap.get(RawHapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
|
||||||
if ( deletionAllele != null && deletionAllele.isReference() )
|
if ( deletionAllele != null && deletionAllele.isReference() )
|
||||||
deletionLength = deletionAllele.length();
|
deletionLength = deletionAllele.length();
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -321,7 +367,7 @@ public class VariantContextAdaptors {
|
||||||
long end = hapmap.getEnd();
|
long end = hapmap.getEnd();
|
||||||
if ( deletionLength > 0 )
|
if ( deletionLength > 0 )
|
||||||
end += deletionLength;
|
end += deletionLength;
|
||||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
|
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
|
||||||
return vc;
|
return vc;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,193 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broad.tribble.Feature;
|
|
||||||
import org.broad.tribble.exception.CodecLineParsingException;
|
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
|
||||||
import org.broad.tribble.readers.LineReader;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.StringTokenizer;
|
|
||||||
|
|
||||||
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
|
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
|
||||||
|
|
||||||
public static final String DELIMITER = "\t";
|
|
||||||
|
|
||||||
private ArrayList<String> header;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* The parser to use when resolving genome-wide locations.
|
|
||||||
*/
|
|
||||||
private GenomeLocParser genomeLocParser;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set the parser to use when resolving genetic data.
|
|
||||||
* @param genomeLocParser The supplied parser.
|
|
||||||
*/
|
|
||||||
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
|
||||||
this.genomeLocParser = genomeLocParser;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the header.
|
|
||||||
*
|
|
||||||
* @param reader
|
|
||||||
*
|
|
||||||
* @return The # of header lines for this file.
|
|
||||||
*/
|
|
||||||
public Object readHeader(LineReader reader)
|
|
||||||
{
|
|
||||||
int[] lineCounter = new int[1];
|
|
||||||
try {
|
|
||||||
header = readHeader(reader, lineCounter);
|
|
||||||
} catch(IOException e) {
|
|
||||||
throw new IllegalArgumentException("Unable to read from file.", e);
|
|
||||||
}
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Class<AnnotatorInputTableFeature> getFeatureType() {
|
|
||||||
return AnnotatorInputTableFeature.class;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Feature decodeLoc(String line) {
|
|
||||||
StringTokenizer st = new StringTokenizer(line, DELIMITER);
|
|
||||||
if ( st.countTokens() < 1 )
|
|
||||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
|
||||||
|
|
||||||
GenomeLoc loc;
|
|
||||||
String chr = st.nextToken();
|
|
||||||
if ( chr.indexOf(":") != -1 ) {
|
|
||||||
loc = genomeLocParser.parseGenomeLoc(chr);
|
|
||||||
} else {
|
|
||||||
if ( st.countTokens() < 3 )
|
|
||||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
|
||||||
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
|
||||||
}
|
|
||||||
return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the line into an AnnotatorInputTableFeature object.
|
|
||||||
*
|
|
||||||
* @param line
|
|
||||||
*/
|
|
||||||
public AnnotatorInputTableFeature decode(String line) {
|
|
||||||
final ArrayList<String> header = this.header; //optimization
|
|
||||||
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
|
|
||||||
|
|
||||||
if ( values.size() != header.size()) {
|
|
||||||
throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
|
|
||||||
}
|
|
||||||
|
|
||||||
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
|
|
||||||
for ( int i = 0; i < header.size(); i++ ) {
|
|
||||||
feature.putColumnValue(header.get(i), values.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
GenomeLoc loc;
|
|
||||||
if ( values.get(0).indexOf(":") != -1 )
|
|
||||||
loc = genomeLocParser.parseGenomeLoc(values.get(0));
|
|
||||||
else
|
|
||||||
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
|
||||||
|
|
||||||
//parse the location
|
|
||||||
feature.setChr(loc.getContig());
|
|
||||||
feature.setStart((int)loc.getStart());
|
|
||||||
feature.setEnd((int)loc.getStop());
|
|
||||||
|
|
||||||
return feature;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the header.
|
|
||||||
* @param source
|
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public static ArrayList<String> readHeader(final File source) throws IOException {
|
|
||||||
FileInputStream is = new FileInputStream(source);
|
|
||||||
try {
|
|
||||||
return readHeader(new AsciiLineReader(is), null);
|
|
||||||
} finally {
|
|
||||||
is.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
|
|
||||||
* @param source
|
|
||||||
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
|
|
||||||
* @return The header fields.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
|
|
||||||
|
|
||||||
ArrayList<String> header = null;
|
|
||||||
int numLines = 0;
|
|
||||||
|
|
||||||
//find the 1st line that's non-empty and not a comment
|
|
||||||
String line = null;
|
|
||||||
while( (line = source.readLine()) != null ) {
|
|
||||||
numLines++;
|
|
||||||
if ( line.trim().isEmpty() || line.startsWith("#") ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//parse the header
|
|
||||||
header = Utils.split(line, DELIMITER);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check that we found the header
|
|
||||||
if ( header == null ) {
|
|
||||||
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if(lineCounter != null) {
|
|
||||||
lineCounter[0] = numLines;
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
|
|
||||||
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,158 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010 The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class represents a single record in an AnnotatorInputTable.
|
|
||||||
*/
|
|
||||||
public class AnnotatorInputTableFeature implements Feature {
|
|
||||||
|
|
||||||
private ArrayList<String> columnNames;
|
|
||||||
private HashMap<String, String> columnValues; //maps colum names to column values
|
|
||||||
|
|
||||||
private String chr;
|
|
||||||
private int start;
|
|
||||||
private int end;
|
|
||||||
private String strRep = null;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
* @param chr The chromosome name.
|
|
||||||
* @param start The start position
|
|
||||||
* @param end The end position
|
|
||||||
*/
|
|
||||||
public AnnotatorInputTableFeature(String chr, int start, int end) {
|
|
||||||
this.chr = chr;
|
|
||||||
this.start = start;
|
|
||||||
this.end = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
* @param columnNames The column names as parsed out of the file header.
|
|
||||||
*/
|
|
||||||
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
|
|
||||||
this.columnNames = columnNames;
|
|
||||||
this.columnValues = new HashMap<String, String>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return the list of column names from the file header.
|
|
||||||
*/
|
|
||||||
public ArrayList<String> getHeader() {
|
|
||||||
return columnNames;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value of the given column.
|
|
||||||
*
|
|
||||||
* @param columnName The column name as it appears in the file header.
|
|
||||||
* @return The value
|
|
||||||
*/
|
|
||||||
public String getColumnValue(final String columnName) {
|
|
||||||
return columnValues.get(columnName);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean containsColumnName(final String columnName) {
|
|
||||||
return columnValues.containsKey(columnName);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the value for the given column.
|
|
||||||
*
|
|
||||||
* @param columnName The column name as it appears in the file header.
|
|
||||||
* @param value The value
|
|
||||||
* @return The existing value associated with the columnName, if there is one.
|
|
||||||
*/
|
|
||||||
protected String putColumnValue(final String columnName, final String value) {
|
|
||||||
return columnValues.put(columnName, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return all values in this line, hashed by their column names.
|
|
||||||
*/
|
|
||||||
public Map<String,String> getColumnValues() {
|
|
||||||
return Collections.unmodifiableMap(columnValues);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public String getChr() {
|
|
||||||
return chr;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getEnd() {
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setChr(String chr) {
|
|
||||||
this.chr = chr;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setStart(int start) {
|
|
||||||
this.start = start;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setEnd(int end) {
|
|
||||||
this.end = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
if ( strRep == null ) {
|
|
||||||
StringBuilder sb = new StringBuilder();
|
|
||||||
|
|
||||||
for(String columnName : columnNames ) {
|
|
||||||
if ( sb.length() == 0 )
|
|
||||||
sb.append("[");
|
|
||||||
else
|
|
||||||
sb.append(", ");
|
|
||||||
sb.append(columnName + "=" + columnValues.get(columnName));
|
|
||||||
}
|
|
||||||
sb.append("]");
|
|
||||||
|
|
||||||
strRep = sb.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
return strRep;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||||
import org.broadinstitute.sting.commandline.Input;
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
|
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
|
||||||
|
|
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
|
||||||
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
||||||
|
|
||||||
// find the types available to the track builders
|
// find the types available to the track builders
|
||||||
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
|
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
|
||||||
|
|
||||||
// check that the type is valid
|
// check that the type is valid
|
||||||
if (!typeMapping.containsKey(inputFileType))
|
if (descriptor == null)
|
||||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet()));
|
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
|
||||||
|
|
||||||
// create the codec
|
// create the codec
|
||||||
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType));
|
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
|
||||||
|
|
||||||
// check if it's a reference dependent feature codec
|
// check if it's a reference dependent feature codec
|
||||||
if (codec instanceof ReferenceDependentFeatureCodec)
|
if (codec instanceof ReferenceDependentFeatureCodec)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,255 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
|
import com.google.java.contract.Ensures;
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
|
import org.broad.tribble.NameAwareCodec;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||||
|
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||||
|
|
||||||
|
import javax.mail.Header;
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class for managing Tribble Feature readers available to the GATK. The features
|
||||||
|
* are dynamically determined via a PluginManager. This class provides convenient
|
||||||
|
* getter methods for obtaining FeatureDescriptor objects that collect all of the
|
||||||
|
* useful information about the Tribble Codec, Feature, and name in one place.
|
||||||
|
*
|
||||||
|
* @author depristo
|
||||||
|
*/
|
||||||
|
public class FeatureManager {
|
||||||
|
public static class FeatureDescriptor implements Comparable<FeatureDescriptor> {
|
||||||
|
final String name;
|
||||||
|
final FeatureCodec codec;
|
||||||
|
|
||||||
|
public FeatureDescriptor(final String name, final FeatureCodec codec) {
|
||||||
|
this.name = name;
|
||||||
|
this.codec = codec;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() {
|
||||||
|
return name;
|
||||||
|
}
|
||||||
|
public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
|
||||||
|
public FeatureCodec getCodec() {
|
||||||
|
return codec;
|
||||||
|
}
|
||||||
|
public Class getCodecClass() { return codec.getClass(); }
|
||||||
|
public Class getFeatureClass() { return codec.getFeatureType(); }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
|
||||||
|
getName(), getCodecClass().getName(), getFeatureClass().getName());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int compareTo(FeatureDescriptor o) {
|
||||||
|
return getName().compareTo(o.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final PluginManager<FeatureCodec> pluginManager;
|
||||||
|
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Construct a FeatureManager
|
||||||
|
*/
|
||||||
|
public FeatureManager() {
|
||||||
|
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||||
|
|
||||||
|
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
|
||||||
|
FeatureCodec codec = pluginManager.createByName(rawName);
|
||||||
|
String name = rawName.toUpperCase();
|
||||||
|
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
|
||||||
|
featureDescriptors.add(featureDescriptor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
|
||||||
|
*
|
||||||
|
* @param codecClass
|
||||||
|
* @return A FeatureDescriptor or null if none is found
|
||||||
|
*/
|
||||||
|
@Requires("codecClass != null")
|
||||||
|
public FeatureDescriptor getByCodec(Class codecClass) {
|
||||||
|
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||||
|
if ( descriptor.getCodecClass().equals(codecClass) )
|
||||||
|
return descriptor;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a collection of FeatureDescriptors that emit records of type featureClass
|
||||||
|
*
|
||||||
|
* @param featureClass
|
||||||
|
* @return A FeatureDescriptor or null if none is found
|
||||||
|
*/
|
||||||
|
@Requires("featureClass != null")
|
||||||
|
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
|
||||||
|
Set<FeatureDescriptor> consistentDescriptors = new TreeSet<FeatureDescriptor>();
|
||||||
|
|
||||||
|
if (featureClass == null)
|
||||||
|
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
||||||
|
|
||||||
|
for ( FeatureDescriptor descriptor : featureDescriptors ) {
|
||||||
|
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
|
||||||
|
consistentDescriptors.add(descriptor);
|
||||||
|
}
|
||||||
|
return consistentDescriptors;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the FeatureDescriptor with getName().equals(name)
|
||||||
|
*
|
||||||
|
* @param name
|
||||||
|
* @return A FeatureDescriptor or null if none is found
|
||||||
|
*/
|
||||||
|
@Requires("name != null")
|
||||||
|
public FeatureDescriptor getByName(String name) {
|
||||||
|
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||||
|
if ( descriptor.getName().equalsIgnoreCase(name) )
|
||||||
|
return descriptor;
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
|
||||||
|
*
|
||||||
|
* @param file
|
||||||
|
* @return A FeatureDescriptor or null if none is found
|
||||||
|
*/
|
||||||
|
@Requires({"file != null", "file.isFile()", "file.canRead()"})
|
||||||
|
public FeatureDescriptor getByFiletype(File file) {
|
||||||
|
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
|
||||||
|
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||||
|
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
|
||||||
|
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
|
||||||
|
canParse.add(descriptor);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( canParse.size() == 0 )
|
||||||
|
return null;
|
||||||
|
else if ( canParse.size() > 1 )
|
||||||
|
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
|
||||||
|
else
|
||||||
|
return canParse.get(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
|
||||||
|
* @param triplet
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Requires("triplet != null")
|
||||||
|
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
|
||||||
|
return getByName(triplet.getType());
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return all of the FeatureDescriptors available to the GATK. Never null
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public Collection<FeatureDescriptor> getFeatureDescriptors() {
|
||||||
|
return Collections.unmodifiableCollection(featureDescriptors);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public String userFriendlyListOfAvailableFeatures() {
|
||||||
|
return userFriendlyListOfAvailableFeatures(Feature.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
|
||||||
|
* restricted to only Codecs producting Features consistent with the requiredFeatureType
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
public String userFriendlyListOfAvailableFeatures(Class<? extends Feature> requiredFeatureType) {
|
||||||
|
final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
|
||||||
|
|
||||||
|
int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
|
||||||
|
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
|
||||||
|
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
|
||||||
|
maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
|
||||||
|
maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder docs = new StringBuilder();
|
||||||
|
String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
|
||||||
|
docs.append(String.format(format, nameHeader, featureHeader, docHeader));
|
||||||
|
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
|
||||||
|
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
|
||||||
|
String oneDoc = String.format(format,
|
||||||
|
descriptor.getName(),
|
||||||
|
descriptor.getSimpleFeatureName(),
|
||||||
|
GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
|
||||||
|
docs.append(oneDoc);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return docs.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Create a new FeatureCodec of the type described in descriptor, assigning it the
|
||||||
|
* name (if possible) and providing it the genomeLocParser (where necessary)
|
||||||
|
*
|
||||||
|
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
|
||||||
|
* @param name the name to assign this codec
|
||||||
|
* @return the feature codec itself
|
||||||
|
*/
|
||||||
|
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
|
||||||
|
@Ensures("result != null")
|
||||||
|
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
|
||||||
|
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
|
||||||
|
if ( codex instanceof NameAwareCodec )
|
||||||
|
((NameAwareCodec)codex).setName(name);
|
||||||
|
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||||
|
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||||
|
return codex;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010. The Broad Institute
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
|
||||||
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @author aaron
|
|
||||||
* <p/>
|
|
||||||
* Interface QueryableTrack
|
|
||||||
* <p/>
|
|
||||||
* a decorator interface for tracks that are queryable
|
|
||||||
*/
|
|
||||||
public interface QueryableTrack {
|
|
||||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
|
|
||||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
|
||||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
|
||||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
|
||||||
public void close();
|
|
||||||
}
|
|
||||||
|
|
@ -25,8 +25,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
import net.sf.samtools.util.CloseableIterator;
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureSource;
|
import org.broad.tribble.FeatureSource;
|
||||||
|
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||||
|
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
@ -45,10 +49,10 @@ import java.io.IOException;
|
||||||
* the basics of what a reference metadata track must contain.
|
* the basics of what a reference metadata track must contain.
|
||||||
*/
|
*/
|
||||||
public class RMDTrack {
|
public class RMDTrack {
|
||||||
|
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||||
|
|
||||||
// the basics of a track:
|
// the basics of a track:
|
||||||
private final Class type; // our type
|
private final Class type; // our type
|
||||||
private final Class recordType; // the underlying records that are produced by this track
|
|
||||||
private final String name; // the name
|
private final String name; // the name
|
||||||
private final File file; // the associated file we create the reader from
|
private final File file; // the associated file we create the reader from
|
||||||
|
|
||||||
|
|
@ -90,7 +94,6 @@ public class RMDTrack {
|
||||||
*/
|
*/
|
||||||
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
|
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
|
||||||
this.type = type;
|
this.type = type;
|
||||||
this.recordType = codec.getFeatureType();
|
|
||||||
this.name = name;
|
this.name = name;
|
||||||
this.file = file;
|
this.file = file;
|
||||||
this.reader = reader;
|
this.reader = reader;
|
||||||
|
|
@ -112,19 +115,10 @@ public class RMDTrack {
|
||||||
}
|
}
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
CloseableTribbleIterator<Feature> iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop());
|
||||||
}
|
if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||||
|
logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog());
|
||||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName());
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
|
||||||
}
|
|
||||||
|
|
||||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
|
||||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void close() {
|
public void close() {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2010 The Broad Institute
|
* Copyright (c) 2011, The Broad Institute
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person
|
* Permission is hereby granted, free of charge, to any person
|
||||||
* obtaining a copy of this software and associated documentation
|
* obtaining a copy of this software and associated documentation
|
||||||
|
|
@ -12,37 +12,36 @@
|
||||||
*
|
*
|
||||||
* The above copyright notice and this permission notice shall be
|
* The above copyright notice and this permission notice shall be
|
||||||
* included in all copies or substantial portions of the Software.
|
* included in all copies or substantial portions of the Software.
|
||||||
*
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||||
|
|
||||||
import net.sf.samtools.SAMSequenceDictionary;
|
import net.sf.samtools.SAMSequenceDictionary;
|
||||||
import net.sf.samtools.SAMSequenceRecord;
|
import net.sf.samtools.SAMSequenceRecord;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.*;
|
import org.broad.tribble.FeatureCodec;
|
||||||
|
import org.broad.tribble.FeatureSource;
|
||||||
|
import org.broad.tribble.Tribble;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.index.Index;
|
import org.broad.tribble.index.Index;
|
||||||
import org.broad.tribble.index.IndexFactory;
|
import org.broad.tribble.index.IndexFactory;
|
||||||
import org.broad.tribble.source.BasicFeatureSource;
|
import org.broad.tribble.source.BasicFeatureSource;
|
||||||
|
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
|
||||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||||
import org.broadinstitute.sting.commandline.Tags;
|
import org.broadinstitute.sting.commandline.Tags;
|
||||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -53,7 +52,10 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.LinkedHashSet;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.TreeSet;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -67,17 +69,16 @@ import java.util.*;
|
||||||
* that gets iterators from the FeatureReader using Tribble.
|
* that gets iterators from the FeatureReader using Tribble.
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||||
/**
|
/**
|
||||||
* our log, which we use to capture anything from this class
|
* our log, which we use to capture anything from this class
|
||||||
*/
|
*/
|
||||||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||||
|
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
||||||
|
|
||||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||||
|
|
||||||
private Map<String, Class> classes = null;
|
|
||||||
|
|
||||||
// private sequence dictionary we use to set our tracks with
|
// private sequence dictionary we use to set our tracks with
|
||||||
private SAMSequenceDictionary dict = null;
|
private SAMSequenceDictionary dict = null;
|
||||||
|
|
||||||
|
|
@ -91,6 +92,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
*/
|
*/
|
||||||
private ValidationExclusion.TYPE validationExclusionType;
|
private ValidationExclusion.TYPE validationExclusionType;
|
||||||
|
|
||||||
|
FeatureManager featureManager;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
||||||
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
||||||
|
|
@ -102,29 +105,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
public RMDTrackBuilder(SAMSequenceDictionary dict,
|
public RMDTrackBuilder(SAMSequenceDictionary dict,
|
||||||
GenomeLocParser genomeLocParser,
|
GenomeLocParser genomeLocParser,
|
||||||
ValidationExclusion.TYPE validationExclusionType) {
|
ValidationExclusion.TYPE validationExclusionType) {
|
||||||
super(FeatureCodec.class, "Codecs", "Codec");
|
|
||||||
this.dict = dict;
|
this.dict = dict;
|
||||||
this.genomeLocParser = genomeLocParser;
|
|
||||||
this.validationExclusionType = validationExclusionType;
|
this.validationExclusionType = validationExclusionType;
|
||||||
|
this.genomeLocParser = genomeLocParser;
|
||||||
classes = new HashMap<String, Class>();
|
featureManager = new FeatureManager();
|
||||||
for (String name: this.getPluginsByName().keySet()) {
|
|
||||||
classes.put(name.toUpperCase(), getPluginsByName().get(name));
|
|
||||||
} }
|
|
||||||
|
|
||||||
/** @return a list of all available track types we currently have access to create */
|
|
||||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
|
||||||
return Collections.unmodifiableMap(classes);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** @return a list of all available track record types we currently have access to create */
|
public FeatureManager getFeatureManager() {
|
||||||
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
|
return featureManager;
|
||||||
HashMap classToRecord = new HashMap<String, Class>();
|
|
||||||
for (String name: this.getPluginsByName().keySet()) {
|
|
||||||
FeatureCodec codec = this.createByName(name);
|
|
||||||
classToRecord.put(name, codec.getFeatureType());
|
|
||||||
}
|
|
||||||
return classToRecord;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -133,45 +121,38 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
* @param fileDescriptor a description of the type of track to build.
|
* @param fileDescriptor a description of the type of track to build.
|
||||||
*
|
*
|
||||||
* @return an instance of the track
|
* @return an instance of the track
|
||||||
* @throws RMDTrackCreationException
|
|
||||||
* if we don't know of the target class or we couldn't create it
|
|
||||||
*/
|
*/
|
||||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException {
|
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
|
||||||
String name = fileDescriptor.getName();
|
String name = fileDescriptor.getName();
|
||||||
File inputFile = new File(fileDescriptor.getFile());
|
File inputFile = new File(fileDescriptor.getFile());
|
||||||
|
|
||||||
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
|
||||||
if (featureCodecClass == null)
|
if (descriptor == null)
|
||||||
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
|
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
|
||||||
|
|
||||||
// return a feature reader track
|
// return a feature reader track
|
||||||
Pair<FeatureSource, SAMSequenceDictionary> pair;
|
Pair<FeatureSource, SAMSequenceDictionary> pair;
|
||||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||||
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
|
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
|
||||||
else
|
else
|
||||||
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
|
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
|
||||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||||
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
|
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
|
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
|
||||||
* @param targetClass Type of Tribble class to build.
|
* @param codecClass Type of Tribble codec class to build.
|
||||||
* @param inputFile Input file type to use.
|
* @param inputFile Input file type to use.
|
||||||
* @return An RMDTrack, suitable for accessing reference metadata.
|
* @return An RMDTrack, suitable for accessing reference metadata.
|
||||||
*/
|
*/
|
||||||
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
|
public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
|
||||||
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
|
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||||
String typeName = null;
|
|
||||||
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
|
|
||||||
if(trackType.getValue().equals(targetClass))
|
|
||||||
typeName = trackType.getKey();
|
|
||||||
}
|
|
||||||
|
|
||||||
if(typeName == null)
|
if (descriptor == null)
|
||||||
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
|
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
||||||
|
|
||||||
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -179,16 +160,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
* reader of the appropriate type will figure out what the right index type is, and determine if it
|
* reader of the appropriate type will figure out what the right index type is, and determine if it
|
||||||
* exists.
|
* exists.
|
||||||
*
|
*
|
||||||
* @param targetClass the codec class type
|
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||||
* @param name the name of the track
|
* @param name the name of the track
|
||||||
* @param inputFile the file to load
|
* @param inputFile the file to load
|
||||||
* @return a feature reader implementation
|
* @return a feature reader implementation
|
||||||
*/
|
*/
|
||||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
|
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
|
||||||
// we might not know the index type, try loading with the default reader constructor
|
// we might not know the index type, try loading with the default reader constructor
|
||||||
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
||||||
try {
|
try {
|
||||||
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
|
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
|
||||||
} catch (TribbleException e) {
|
} catch (TribbleException e) {
|
||||||
throw new UserException(e.getMessage(), e);
|
throw new UserException(e.getMessage(), e);
|
||||||
}
|
}
|
||||||
|
|
@ -196,28 +177,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* add a name to the codec, if it takes one
|
* add a name to the codec, if it takes one
|
||||||
* @param targetClass the class to create a codec for
|
* @param descriptor the class to create a codec for
|
||||||
* @param name the name to assign this codec
|
* @param name the name to assign this codec
|
||||||
* @return the feature codec itself
|
* @return the feature codec itself
|
||||||
*/
|
*/
|
||||||
public FeatureCodec createCodec(Class targetClass, String name) {
|
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
|
||||||
FeatureCodec codex = this.createByType(targetClass);
|
return featureManager.createCodec(descriptor, name, genomeLocParser);
|
||||||
if ( codex instanceof NameAwareCodec )
|
|
||||||
((NameAwareCodec)codex).setName(name);
|
|
||||||
if(codex instanceof ReferenceDependentFeatureCodec)
|
|
||||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
|
||||||
return codex;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* create a feature source object given:
|
* create a feature source object given:
|
||||||
* @param targetClass the target class
|
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||||
* @param name the name of the codec
|
* @param name the name of the codec
|
||||||
* @param inputFile the tribble file to parse
|
* @param inputFile the tribble file to parse
|
||||||
* @param storageType How the RMD is streamed into the input file.
|
* @param storageType How the RMD is streamed into the input file.
|
||||||
* @return the input file as a FeatureReader
|
* @return the input file as a FeatureReader
|
||||||
*/
|
*/
|
||||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
|
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
|
||||||
|
String name,
|
||||||
|
File inputFile,
|
||||||
|
RMDStorageType storageType) {
|
||||||
// Feature source and sequence dictionary to use as the ultimate reference
|
// Feature source and sequence dictionary to use as the ultimate reference
|
||||||
FeatureSource featureSource = null;
|
FeatureSource featureSource = null;
|
||||||
SAMSequenceDictionary sequenceDictionary = null;
|
SAMSequenceDictionary sequenceDictionary = null;
|
||||||
|
|
@ -227,7 +206,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
|
|
||||||
if(canBeIndexed) {
|
if(canBeIndexed) {
|
||||||
try {
|
try {
|
||||||
Index index = loadIndex(inputFile, createCodec(targetClass, name));
|
Index index = loadIndex(inputFile, createCodec(descriptor, name));
|
||||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||||
catch (ReviewedStingException e) { }
|
catch (ReviewedStingException e) { }
|
||||||
|
|
||||||
|
|
@ -240,7 +219,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||||
}
|
}
|
||||||
|
|
||||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
|
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||||
|
featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
|
||||||
|
else
|
||||||
|
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
|
||||||
}
|
}
|
||||||
catch (TribbleException e) {
|
catch (TribbleException e) {
|
||||||
throw new UserException(e.getMessage());
|
throw new UserException(e.getMessage());
|
||||||
|
|
@ -250,7 +232,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false);
|
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
||||||
|
|
@ -385,22 +367,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||||
return idx;
|
return idx;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns a collection of track names that match the record type.
|
|
||||||
* @param trackRecordType the record type specified in the @RMD annotation
|
|
||||||
* @return a collection of available track record type names that match the record type
|
|
||||||
*/
|
|
||||||
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
|
|
||||||
Set<String> names = new TreeSet<String>();
|
|
||||||
if (trackRecordType == null)
|
|
||||||
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
|
||||||
|
|
||||||
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
|
|
||||||
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
|
|
||||||
names.add(availableTrackRecordType.getKey());
|
|
||||||
}
|
|
||||||
return names;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------------------------------------
|
||||||
// static functions to work with the sequence dictionaries of indexes
|
// static functions to work with the sequence dictionaries of indexes
|
||||||
|
|
@ -1,45 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010. The Broad Institute
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
|
||||||
|
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author aaron
|
|
||||||
*
|
|
||||||
* Class RMDTrackCreationException
|
|
||||||
*
|
|
||||||
* if we fail for some reason to make a track, throw this exception
|
|
||||||
*/
|
|
||||||
public class RMDTrackCreationException extends ReviewedStingException {
|
|
||||||
public RMDTrackCreationException(String msg) {
|
|
||||||
super(msg);
|
|
||||||
}
|
|
||||||
|
|
||||||
public RMDTrackCreationException(String message, Throwable throwable) {
|
|
||||||
super(message, throwable);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
||||||
|
|
||||||
public abstract GenomeLoc getLocation();
|
public abstract GenomeLoc getLocation();
|
||||||
|
|
||||||
|
// TODO: this should be a Feature
|
||||||
public abstract Object getUnderlyingObject();
|
public abstract Object getUnderlyingObject();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
||||||
return feature.getEnd();
|
return feature.getEnd();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: this should be a Feature, actually
|
||||||
public Object getUnderlyingObject() {
|
public Object getUnderlyingObject() {
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* wrapping a old style rod into the new GATK feature style
|
|
||||||
*/
|
|
||||||
public static class RODGATKFeature extends GATKFeature {
|
|
||||||
|
|
||||||
// our data
|
|
||||||
private ReferenceOrderedDatum datum;
|
|
||||||
|
|
||||||
public RODGATKFeature(ReferenceOrderedDatum datum) {
|
|
||||||
super(datum.getName());
|
|
||||||
this.datum = datum;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public GenomeLoc getLocation() {
|
|
||||||
return datum.getLocation();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Object getUnderlyingObject() {
|
|
||||||
return datum;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getChr() {
|
|
||||||
return datum.getLocation().getContig();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getStart() {
|
|
||||||
return (int)datum.getLocation().getStart();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getEnd() {
|
|
||||||
return (int)datum.getLocation().getStop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,65 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2010. The Broad Institute
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
|
||||||
|
|
||||||
import net.sf.samtools.util.CloseableIterator;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
|
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @author aaron
|
|
||||||
*
|
|
||||||
* Class GATKFeatureIterator
|
|
||||||
*
|
|
||||||
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
|
|
||||||
*/
|
|
||||||
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
|
||||||
private final Iterator<ReferenceOrderedDatum> iter;
|
|
||||||
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
|
|
||||||
this.iter = iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean hasNext() {
|
|
||||||
return iter.hasNext();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public GATKFeature next() {
|
|
||||||
return new GATKFeature.RODGATKFeature(iter.next());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void remove() {
|
|
||||||
throw new UnsupportedOperationException("Remove not supported");
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
// do nothing, our underlying iterator doesn't support this
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,190 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
|
|
||||||
|
|
||||||
import net.sf.samtools.util.SequenceUtil;
|
|
||||||
import org.broad.tribble.annotation.Strand;
|
|
||||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* this class contains static helper methods for DbSNP
|
|
||||||
*/
|
|
||||||
public class DbSNPHelper {
|
|
||||||
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
|
|
||||||
|
|
||||||
private DbSNPHelper() {} // don't make a DbSNPHelper
|
|
||||||
|
|
||||||
public static DbSNPFeature getFirstRealSNP(List<Object> dbsnpList) {
|
|
||||||
if (dbsnpList == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
DbSNPFeature dbsnp = null;
|
|
||||||
for (Object d : dbsnpList) {
|
|
||||||
if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) {
|
|
||||||
dbsnp = (DbSNPFeature) d;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return dbsnp;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String rsIDOfFirstRealSNP(List<Object> featureList) {
|
|
||||||
if (featureList == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
String rsID = null;
|
|
||||||
for ( Object d : featureList ) {
|
|
||||||
if ( d instanceof DbSNPFeature ) {
|
|
||||||
if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) {
|
|
||||||
rsID = ((DbSNPFeature)d).getRsID();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if ( d instanceof VariantContext) {
|
|
||||||
if ( ((VariantContext)d).isSNP() ) {
|
|
||||||
rsID = ((VariantContext)d).getID();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rsID;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String rsIDOfFirstRealIndel(List<Object> featureList) {
|
|
||||||
if (featureList == null)
|
|
||||||
return null;
|
|
||||||
|
|
||||||
String rsID = null;
|
|
||||||
for ( Object d : featureList ) {
|
|
||||||
if ( d instanceof DbSNPFeature ) {
|
|
||||||
if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) {
|
|
||||||
rsID = ((DbSNPFeature)d).getRsID();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else if ( d instanceof VariantContext) {
|
|
||||||
if ( ((VariantContext)d).isIndel() ) {
|
|
||||||
rsID = ((VariantContext)d).getID();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return rsID;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* get the -1 * (log 10 of the error value)
|
|
||||||
*
|
|
||||||
* @return the log based error estimate
|
|
||||||
*/
|
|
||||||
public static double getNegLog10PError(DbSNPFeature feature) {
|
|
||||||
return 4; // -log10(0.0001)
|
|
||||||
}
|
|
||||||
|
|
||||||
//
|
|
||||||
// What kind of variant are we?
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public static boolean isSNP(DbSNPFeature feature) {
|
|
||||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isMNP(DbSNPFeature feature) {
|
|
||||||
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String toMediumString(DbSNPFeature feature) {
|
|
||||||
String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved()));
|
|
||||||
if (isSNP(feature)) s += ":SNP";
|
|
||||||
if (isIndel(feature)) s += ":Indel";
|
|
||||||
if (isHapmap(feature)) s += ":Hapmap";
|
|
||||||
if (is2Hit2Allele(feature)) s += ":2Hit";
|
|
||||||
return s;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isInsertion(DbSNPFeature feature) {
|
|
||||||
return feature.getVariantType().contains("insertion");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isDeletion(DbSNPFeature feature) {
|
|
||||||
return feature.getVariantType().contains("deletion");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isIndel(DbSNPFeature feature) {
|
|
||||||
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isComplexIndel(DbSNPFeature feature) {
|
|
||||||
return feature.getVariantType().contains("in-del");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isHapmap(DbSNPFeature feature) {
|
|
||||||
return feature.getValidationStatus().contains("by-hapmap");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean is2Hit2Allele(DbSNPFeature feature) {
|
|
||||||
return feature.getValidationStatus().contains("by-2hit-2allele");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean is1000genomes(DbSNPFeature feature) {
|
|
||||||
return feature.getValidationStatus().contains("by-1000genomes");
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isMQ1(DbSNPFeature feature) {
|
|
||||||
return feature.getWeight() == 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
|
||||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
|
||||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
|
||||||
* frequency).
|
|
||||||
*
|
|
||||||
* @return an alternate allele list
|
|
||||||
*/
|
|
||||||
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
|
|
||||||
List<String> ret = new ArrayList<String>();
|
|
||||||
for (String allele : getAlleleList(feature))
|
|
||||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean onFwdStrand(DbSNPFeature feature) {
|
|
||||||
return feature.getStrand() == Strand.POSITIVE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getReference(DbSNPFeature feature) {
|
|
||||||
return feature.getNCBIRefBase();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String toSimpleString(DbSNPFeature feature) {
|
|
||||||
return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* gets the alleles. This method should return all the alleles present at the location,
|
|
||||||
* including the reference base. The first allele should always be the reference allele, followed
|
|
||||||
* by an unordered list of alternate alleles.
|
|
||||||
*
|
|
||||||
* @return an alternate allele list
|
|
||||||
*/
|
|
||||||
public static List<String> getAlleleList(DbSNPFeature feature) {
|
|
||||||
List<String> alleleList = new ArrayList<String>();
|
|
||||||
// add ref first
|
|
||||||
if ( onFwdStrand(feature) )
|
|
||||||
alleleList = Arrays.asList(feature.getObserved());
|
|
||||||
else
|
|
||||||
for (String str : feature.getObserved())
|
|
||||||
alleleList.add(SequenceUtil.reverseComplement(str));
|
|
||||||
if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) )
|
|
||||||
Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0);
|
|
||||||
|
|
||||||
return alleleList;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,21 +1,25 @@
|
||||||
package org.broadinstitute.sting.gatk.report;
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||||
|
|
||||||
import java.io.*;
|
import java.io.*;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.List;
|
||||||
import java.util.TreeMap;
|
import java.util.TreeMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Container class for GATK report tables
|
* Container class for GATK report tables
|
||||||
*/
|
*/
|
||||||
public class GATKReport {
|
public class GATKReport {
|
||||||
private TreeMap<String, GATKReportTable> tables;
|
public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
|
||||||
|
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a new, empty GATKReport.
|
* Create a new, empty GATKReport.
|
||||||
*/
|
*/
|
||||||
public GATKReport() {
|
public GATKReport() {
|
||||||
tables = new TreeMap<String, GATKReportTable>();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -23,7 +27,7 @@ public class GATKReport {
|
||||||
* @param filename the path to the file to load
|
* @param filename the path to the file to load
|
||||||
*/
|
*/
|
||||||
public GATKReport(String filename) {
|
public GATKReport(String filename) {
|
||||||
loadReport(new File(filename));
|
this(new File(filename));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -31,7 +35,6 @@ public class GATKReport {
|
||||||
* @param file the file to load
|
* @param file the file to load
|
||||||
*/
|
*/
|
||||||
public GATKReport(File file) {
|
public GATKReport(File file) {
|
||||||
tables = new TreeMap<String, GATKReportTable>();
|
|
||||||
loadReport(file);
|
loadReport(file);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -46,11 +49,17 @@ public class GATKReport {
|
||||||
GATKReportTable table = null;
|
GATKReportTable table = null;
|
||||||
String[] header = null;
|
String[] header = null;
|
||||||
int id = 0;
|
int id = 0;
|
||||||
|
GATKReportVersion version = null;
|
||||||
|
List<Integer> columnStarts = null;
|
||||||
|
|
||||||
String line;
|
String line;
|
||||||
while ( (line = reader.readLine()) != null ) {
|
while ( (line = reader.readLine()) != null ) {
|
||||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
|
||||||
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
|
if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
|
||||||
|
|
||||||
|
version = GATKReportVersion.fromHeader(line);
|
||||||
|
|
||||||
|
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
|
||||||
String[] pieces = line.split(" : ");
|
String[] pieces = line.split(" : ");
|
||||||
|
|
||||||
String tableName = pieces[0];
|
String tableName = pieces[0];
|
||||||
|
|
@ -58,14 +67,35 @@ public class GATKReport {
|
||||||
|
|
||||||
addTable(tableName, tableDesc);
|
addTable(tableName, tableDesc);
|
||||||
table = getTable(tableName);
|
table = getTable(tableName);
|
||||||
|
table.setVersion(version);
|
||||||
|
|
||||||
header = null;
|
header = null;
|
||||||
} else if ( line.isEmpty() ) {
|
columnStarts = null;
|
||||||
|
} else if ( line.trim().isEmpty() ) {
|
||||||
// do nothing
|
// do nothing
|
||||||
} else {
|
} else {
|
||||||
if (table != null) {
|
if (table != null) {
|
||||||
|
|
||||||
|
String[] splitLine;
|
||||||
|
|
||||||
|
switch (version) {
|
||||||
|
case V0_1:
|
||||||
|
splitLine = TextFormattingUtils.splitWhiteSpace(line);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case V0_2:
|
||||||
|
if (header == null) {
|
||||||
|
columnStarts = TextFormattingUtils.getWordStarts(line);
|
||||||
|
}
|
||||||
|
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
|
||||||
|
}
|
||||||
|
|
||||||
if (header == null) {
|
if (header == null) {
|
||||||
header = line.split("\\s+");
|
header = splitLine;
|
||||||
|
|
||||||
table.addPrimaryKey("id", false);
|
table.addPrimaryKey("id", false);
|
||||||
|
|
||||||
|
|
@ -75,10 +105,8 @@ public class GATKReport {
|
||||||
|
|
||||||
id = 0;
|
id = 0;
|
||||||
} else {
|
} else {
|
||||||
String[] entries = line.split("\\s+");
|
|
||||||
|
|
||||||
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
|
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
|
||||||
table.set(id, header[columnIndex], entries[columnIndex]);
|
table.set(id, header[columnIndex], splitLine[columnIndex]);
|
||||||
}
|
}
|
||||||
|
|
||||||
id++;
|
id++;
|
||||||
|
|
@ -125,7 +153,10 @@ public class GATKReport {
|
||||||
* @return the table object
|
* @return the table object
|
||||||
*/
|
*/
|
||||||
public GATKReportTable getTable(String tableName) {
|
public GATKReportTable getTable(String tableName) {
|
||||||
return tables.get(tableName);
|
GATKReportTable table = tables.get(tableName);
|
||||||
|
if (table == null)
|
||||||
|
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
|
||||||
|
return table;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -140,4 +171,8 @@ public class GATKReport {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public Collection<GATKReportTable> getTables() {
|
||||||
|
return tables.values();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||||
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
|
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
|
||||||
* values) in the table.
|
* values) in the table.
|
||||||
*
|
*
|
||||||
* @param primaryKey the primary key position in the column that should be set
|
* @param primaryKey the primary key position in the column that should be retrieved
|
||||||
* @return the value at the specified position in the column, or the default value if the element is not set
|
* @return the value at the specified position in the column, or the default value if the element is not set
|
||||||
*/
|
*/
|
||||||
public Object getWithoutSideEffects(Object primaryKey) {
|
private Object getWithoutSideEffects(Object primaryKey) {
|
||||||
if (!this.containsKey(primaryKey)) {
|
if (!this.containsKey(primaryKey)) {
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||||
return this.get(primaryKey);
|
return this.get(primaryKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return an object from the column, but if it doesn't exist, return the default value.
|
||||||
|
*
|
||||||
|
* @param primaryKey the primary key position in the column that should be retrieved
|
||||||
|
* @return the string value at the specified position in the column, or the default value if the element is not set
|
||||||
|
*/
|
||||||
|
public String getStringValue(Object primaryKey) {
|
||||||
|
return toString(getWithoutSideEffects(primaryKey));
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the displayable property of the column. If true, the column will be displayed in the final output.
|
* Return the displayable property of the column. If true, the column will be displayed in the final output.
|
||||||
* If not, printing will be suppressed for the contents of the table.
|
* If not, printing will be suppressed for the contents of the table.
|
||||||
|
|
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||||
|
|
||||||
for (Object obj : this.values()) {
|
for (Object obj : this.values()) {
|
||||||
if (obj != null) {
|
if (obj != null) {
|
||||||
int width = obj.toString().length();
|
int width = toString(obj).length();
|
||||||
|
|
||||||
if (width > maxWidth) {
|
if (width > maxWidth) {
|
||||||
maxWidth = width;
|
maxWidth = width;
|
||||||
|
|
@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
||||||
|
|
||||||
return maxWidth;
|
return maxWidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string version of the values.
|
||||||
|
* @param obj The object to convert to a string
|
||||||
|
* @return The string representation of the column
|
||||||
|
*/
|
||||||
|
private static String toString(Object obj) {
|
||||||
|
String value;
|
||||||
|
if (obj == null) {
|
||||||
|
value = "null";
|
||||||
|
} else if (obj instanceof Float) {
|
||||||
|
value = String.format("%.8f", (Float) obj);
|
||||||
|
} else if (obj instanceof Double) {
|
||||||
|
value = String.format("%.8f", (Double) obj);
|
||||||
|
} else {
|
||||||
|
value = obj.toString();
|
||||||
|
}
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getColumnName() {
|
||||||
|
return columnName;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tracks a linked list of GATKReportColumn in order by name.
|
||||||
|
*/
|
||||||
|
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||||
|
private List<String> columnNames = new ArrayList<String>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column by index
|
||||||
|
* @param i the index
|
||||||
|
* @return The column
|
||||||
|
*/
|
||||||
|
public GATKReportColumn getByIndex(int i) {
|
||||||
|
return get(columnNames.get(i));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GATKReportColumn remove(Object key) {
|
||||||
|
columnNames.remove(key);
|
||||||
|
return super.remove(key);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public GATKReportColumn put(String key, GATKReportColumn value) {
|
||||||
|
columnNames.add(key);
|
||||||
|
return super.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,83 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2011, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.report;
|
|
||||||
|
|
||||||
import org.apache.commons.io.FileUtils;
|
|
||||||
import org.apache.commons.io.IOUtils;
|
|
||||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.InputStream;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public class GATKReportParser {
|
|
||||||
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
|
|
||||||
|
|
||||||
public void parse(File file) throws IOException {
|
|
||||||
InputStream stream = FileUtils.openInputStream(file);
|
|
||||||
try {
|
|
||||||
parse(stream);
|
|
||||||
} finally {
|
|
||||||
IOUtils.closeQuietly(stream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void parse(InputStream input) throws IOException {
|
|
||||||
GATKReportTableParser table = null;
|
|
||||||
|
|
||||||
for (String line: new XReadLines(input)) {
|
|
||||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
|
||||||
table = newTableParser(line);
|
|
||||||
tables.add(table);
|
|
||||||
table.parse(line);
|
|
||||||
} else if (table != null) {
|
|
||||||
if (line.trim().length() == 0)
|
|
||||||
table = null;
|
|
||||||
else
|
|
||||||
table.parse(line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValue(String tableName, String[] key, String column) {
|
|
||||||
for (GATKReportTableParser table: tables)
|
|
||||||
if (table.getTableName().equals(tableName))
|
|
||||||
return table.getValue(key, column);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValue(String tableName, String key, String column) {
|
|
||||||
for (GATKReportTableParser table: tables)
|
|
||||||
if (table.getTableName().equals(tableName))
|
|
||||||
return table.getValue(key, column);
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
private GATKReportTableParser newTableParser(String header) {
|
|
||||||
return new GATKReportTableParser();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.report;
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import org.apache.commons.lang.ObjectUtils;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
@ -88,17 +89,22 @@ import java.util.regex.Pattern;
|
||||||
* but at least the prototype contained herein works.
|
* but at least the prototype contained herein works.
|
||||||
*
|
*
|
||||||
* @author Kiran Garimella
|
* @author Kiran Garimella
|
||||||
|
* @author Khalid Shakir
|
||||||
*/
|
*/
|
||||||
public class GATKReportTable {
|
public class GATKReportTable {
|
||||||
|
/** REGEX that matches any table with an invalid name */
|
||||||
|
public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
|
||||||
|
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
|
||||||
private String tableName;
|
private String tableName;
|
||||||
private String tableDescription;
|
private String tableDescription;
|
||||||
|
private GATKReportVersion version = LATEST_REPORT_VERSION;
|
||||||
|
|
||||||
private String primaryKeyName;
|
private String primaryKeyName;
|
||||||
private Collection<Object> primaryKeyColumn;
|
private Collection<Object> primaryKeyColumn;
|
||||||
private boolean primaryKeyDisplay;
|
private boolean primaryKeyDisplay;
|
||||||
boolean sortByPrimaryKey = true;
|
private boolean sortByPrimaryKey = true;
|
||||||
|
|
||||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
private GATKReportColumns columns;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||||
|
|
@ -107,12 +113,25 @@ public class GATKReportTable {
|
||||||
* @return true if the name is valid, false if otherwise
|
* @return true if the name is valid, false if otherwise
|
||||||
*/
|
*/
|
||||||
private boolean isValidName(String name) {
|
private boolean isValidName(String name) {
|
||||||
Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");
|
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
|
||||||
Matcher m = p.matcher(name);
|
Matcher m = p.matcher(name);
|
||||||
|
|
||||||
return !m.find();
|
return !m.find();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||||
|
*
|
||||||
|
* @param description the name of the table or column
|
||||||
|
* @return true if the name is valid, false if otherwise
|
||||||
|
*/
|
||||||
|
private boolean isValidDescription(String description) {
|
||||||
|
Pattern p = Pattern.compile("\\r|\\n");
|
||||||
|
Matcher m = p.matcher(description);
|
||||||
|
|
||||||
|
return !m.find();
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a new GATK report table with the specified name and description
|
* Construct a new GATK report table with the specified name and description
|
||||||
*
|
*
|
||||||
|
|
@ -128,11 +147,23 @@ public class GATKReportTable {
|
||||||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!isValidDescription(tableDescription)) {
|
||||||
|
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
|
||||||
|
}
|
||||||
|
|
||||||
this.tableName = tableName;
|
this.tableName = tableName;
|
||||||
this.tableDescription = tableDescription;
|
this.tableDescription = tableDescription;
|
||||||
this.sortByPrimaryKey = sortByPrimaryKey;
|
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||||
|
|
||||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
columns = new GATKReportColumns();
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKReportVersion getVersion() {
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void setVersion(GATKReportVersion version) {
|
||||||
|
this.version = version;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -161,6 +192,57 @@ public class GATKReportTable {
|
||||||
primaryKeyDisplay = display;
|
primaryKeyDisplay = display;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first primary key matching the dotted column values.
|
||||||
|
* Ex: dbsnp.eval.called.all.novel.all
|
||||||
|
* @param dottedColumnValues Period concatenated values.
|
||||||
|
* @return The first primary key matching the column values or throws an exception.
|
||||||
|
*/
|
||||||
|
public Object getPrimaryKey(String dottedColumnValues) {
|
||||||
|
Object key = findPrimaryKey(dottedColumnValues);
|
||||||
|
if (key == null)
|
||||||
|
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
|
||||||
|
return key;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns true if there is at least on row with the dotted column values.
|
||||||
|
* Ex: dbsnp.eval.called.all.novel.all
|
||||||
|
* @param dottedColumnValues Period concatenated values.
|
||||||
|
* @return true if there is at least one row matching the columns.
|
||||||
|
*/
|
||||||
|
public boolean containsPrimaryKey(String dottedColumnValues) {
|
||||||
|
return findPrimaryKey(dottedColumnValues) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first primary key matching the dotted column values.
|
||||||
|
* Ex: dbsnp.eval.called.all.novel.all
|
||||||
|
* @param dottedColumnValues Period concatenated values.
|
||||||
|
* @return The first primary key matching the column values or null.
|
||||||
|
*/
|
||||||
|
private Object findPrimaryKey(String dottedColumnValues) {
|
||||||
|
return findPrimaryKey(dottedColumnValues.split("\\."));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the first primary key matching the column values.
|
||||||
|
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
|
||||||
|
* @param columnValues column values.
|
||||||
|
* @return The first primary key matching the column values.
|
||||||
|
*/
|
||||||
|
private Object findPrimaryKey(Object[] columnValues) {
|
||||||
|
for (Object primaryKey : primaryKeyColumn) {
|
||||||
|
boolean matching = true;
|
||||||
|
for (int i = 0; matching && i < columnValues.length; i++) {
|
||||||
|
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
|
||||||
|
}
|
||||||
|
if (matching)
|
||||||
|
return primaryKey;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
|
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
|
||||||
*
|
*
|
||||||
|
|
@ -230,6 +312,17 @@ public class GATKReportTable {
|
||||||
return columns.get(columnName).get(primaryKey);
|
return columns.get(columnName).get(primaryKey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a value from the given position in the table
|
||||||
|
*
|
||||||
|
* @param primaryKey the primary key value
|
||||||
|
* @param columnIndex the index of the column
|
||||||
|
* @return the value stored at the specified position in the table
|
||||||
|
*/
|
||||||
|
private Object get(Object primaryKey, int columnIndex) {
|
||||||
|
return columns.getByIndex(columnIndex).get(primaryKey);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Increment an element in the table. This implementation is awful - a functor would probably be better.
|
* Increment an element in the table. This implementation is awful - a functor would probably be better.
|
||||||
*
|
*
|
||||||
|
|
@ -515,7 +608,7 @@ public class GATKReportTable {
|
||||||
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
||||||
|
|
||||||
// Emit the table definition
|
// Emit the table definition
|
||||||
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
|
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
|
||||||
|
|
||||||
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
|
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
|
||||||
boolean needsPadding = false;
|
boolean needsPadding = false;
|
||||||
|
|
@ -545,22 +638,8 @@ public class GATKReportTable {
|
||||||
|
|
||||||
for (String columnName : columns.keySet()) {
|
for (String columnName : columns.keySet()) {
|
||||||
if (columns.get(columnName).isDisplayable()) {
|
if (columns.get(columnName).isDisplayable()) {
|
||||||
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
|
|
||||||
|
|
||||||
if (needsPadding) { out.printf(" "); }
|
if (needsPadding) { out.printf(" "); }
|
||||||
|
String value = columns.get(columnName).getStringValue(primaryKey);
|
||||||
String value = "null";
|
|
||||||
if (obj != null) {
|
|
||||||
if (obj instanceof Float) {
|
|
||||||
value = String.format("%.8f", (Float) obj);
|
|
||||||
} else if (obj instanceof Double) {
|
|
||||||
value = String.format("%.8f", (Double) obj);
|
|
||||||
} else {
|
|
||||||
value = obj.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
|
|
||||||
out.printf(columnWidths.get(columnName), value);
|
out.printf(columnWidths.get(columnName), value);
|
||||||
|
|
||||||
needsPadding = true;
|
needsPadding = true;
|
||||||
|
|
@ -577,4 +656,16 @@ public class GATKReportTable {
|
||||||
public int getNumRows() {
|
public int getNumRows() {
|
||||||
return primaryKeyColumn.size();
|
return primaryKeyColumn.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getTableName() {
|
||||||
|
return tableName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTableDescription() {
|
||||||
|
return tableDescription;
|
||||||
|
}
|
||||||
|
|
||||||
|
public GATKReportColumns getColumns() {
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,75 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (c) 2011, The Broad Institute
|
|
||||||
*
|
|
||||||
* Permission is hereby granted, free of charge, to any person
|
|
||||||
* obtaining a copy of this software and associated documentation
|
|
||||||
* files (the "Software"), to deal in the Software without
|
|
||||||
* restriction, including without limitation the rights to use,
|
|
||||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
||||||
* copies of the Software, and to permit persons to whom the
|
|
||||||
* Software is furnished to do so, subject to the following
|
|
||||||
* conditions:
|
|
||||||
*
|
|
||||||
* The above copyright notice and this permission notice shall be
|
|
||||||
* included in all copies or substantial portions of the Software.
|
|
||||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
||||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
||||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
||||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
||||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
||||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
||||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
||||||
* OTHER DEALINGS IN THE SOFTWARE.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.report;
|
|
||||||
|
|
||||||
import org.apache.commons.lang.StringUtils;
|
|
||||||
|
|
||||||
import java.util.*;
|
|
||||||
|
|
||||||
public class GATKReportTableParser {
|
|
||||||
private int lineNum = 0;
|
|
||||||
private String[] descriptions;
|
|
||||||
private Map<String, Integer> headers = new HashMap<String, Integer>();
|
|
||||||
private List<String[]> values = new ArrayList<String[]>();
|
|
||||||
|
|
||||||
public void parse(String line) {
|
|
||||||
lineNum++;
|
|
||||||
switch (lineNum) {
|
|
||||||
case 1:
|
|
||||||
descriptions = parseLine(line);
|
|
||||||
case 2:
|
|
||||||
String[] columnHeaders = parseLine(line);
|
|
||||||
for (int i = 0; i < columnHeaders.length; i++)
|
|
||||||
headers.put(columnHeaders[i], i);
|
|
||||||
default:
|
|
||||||
values.add(parseLine(line));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getTableName() {
|
|
||||||
return descriptions[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValue(String[] key, String column) {
|
|
||||||
if (!headers.containsKey(column))
|
|
||||||
return null;
|
|
||||||
for (String[] row: values)
|
|
||||||
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
|
|
||||||
return row[headers.get(column)];
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValue(String key, String column) {
|
|
||||||
return getValue(key.split("\\."), column);
|
|
||||||
}
|
|
||||||
|
|
||||||
private String generateKey(String[] row, int i) {
|
|
||||||
return StringUtils.join(row, ".", 0, i);
|
|
||||||
}
|
|
||||||
|
|
||||||
private String[] parseLine(String line) {
|
|
||||||
return line.split(" +");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
@ -0,0 +1,70 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.report;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
|
public enum GATKReportVersion {
|
||||||
|
/**
|
||||||
|
* Differences between other versions:
|
||||||
|
* - Does not allow spaces in cells.
|
||||||
|
* - Mostly fixed width but has a bug where the string width of floating point
|
||||||
|
* values was not measured correctly leading to columns that aren't aligned
|
||||||
|
*/
|
||||||
|
V0_1("v0.1"),
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Differences between other versions:
|
||||||
|
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
|
||||||
|
* - Fixed width fixed for floating point values
|
||||||
|
*/
|
||||||
|
V0_2("v0.2");
|
||||||
|
|
||||||
|
public final String versionString;
|
||||||
|
|
||||||
|
private GATKReportVersion(String versionString) {
|
||||||
|
this.versionString = versionString;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return versionString;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the GATK Report Version from the file header.
|
||||||
|
* @param header Header from the file starting with ##:GATKReport.v[version]
|
||||||
|
* @return The version as an enum.
|
||||||
|
*/
|
||||||
|
public static GATKReportVersion fromHeader(String header) {
|
||||||
|
if (header.startsWith("##:GATKReport.v0.1 "))
|
||||||
|
return GATKReportVersion.V0_1;
|
||||||
|
|
||||||
|
if (header.startsWith("##:GATKReport.v0.2 "))
|
||||||
|
return GATKReportVersion.V0_2;
|
||||||
|
|
||||||
|
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
|
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
|
||||||
|
|
||||||
/** We use the SimpleTimer to time our run */
|
/** We use the SimpleTimer to time our run */
|
||||||
private SimpleTimer timer = new SimpleTimer("Traversal");
|
private SimpleTimer timer = null;
|
||||||
|
|
||||||
// How long can we go without printing some progress info?
|
// How long can we go without printing some progress info?
|
||||||
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
||||||
private int printProgressCheckCounter = 0;
|
private int printProgressCheckCounter = 0;
|
||||||
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
||||||
|
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
|
||||||
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||||
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
||||||
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
||||||
|
|
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/**
|
/**
|
||||||
* Should be called to indicate that we're going to process records and the timer should start ticking
|
* Should be called to indicate that we're going to process records and the timer should start ticking. This
|
||||||
|
* function should be called right before any traversal work is done, to avoid counting setup costs in the
|
||||||
|
* processing costs and inflating the estimated runtime.
|
||||||
*/
|
*/
|
||||||
public void startTimers() {
|
public void startTimersIfNecessary() {
|
||||||
timer.start();
|
if ( timer == null ) {
|
||||||
lastProgressPrintTime = timer.currentTime();
|
timer = new SimpleTimer("Traversal");
|
||||||
|
timer.start();
|
||||||
|
lastProgressPrintTime = timer.currentTime();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
||||||
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
||||||
*/
|
*/
|
||||||
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
|
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
|
||||||
return (curTime - lastPrintTime) > printFreq;
|
long elapsed = curTime - lastPrintTime;
|
||||||
|
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
|
|
@ -173,7 +173,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
* -> those with the same mate pair position, for paired reads
|
* -> those with the same mate pair position, for paired reads
|
||||||
* -> those flagged as unpaired and duplicated but having the same start and end
|
* -> those flagged as unpaired and duplicated but having the same start and end
|
||||||
*/
|
*/
|
||||||
|
boolean done = walker.isDone();
|
||||||
for (SAMRecord read : iter) {
|
for (SAMRecord read : iter) {
|
||||||
|
if ( done ) break;
|
||||||
// get the genome loc from the read
|
// get the genome loc from the read
|
||||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||||
|
|
||||||
|
|
@ -194,6 +196,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
||||||
}
|
}
|
||||||
|
|
||||||
printProgress(dataProvider.getShard(),site);
|
printProgress(dataProvider.getShard(),site);
|
||||||
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
return sum;
|
return sum;
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
||||||
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
||||||
|
|
||||||
LocusView locusView = getLocusView( walker, dataProvider );
|
LocusView locusView = getLocusView( walker, dataProvider );
|
||||||
|
boolean done = false;
|
||||||
|
|
||||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||||
|
|
||||||
|
|
@ -46,7 +47,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
||||||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||||
|
|
||||||
// We keep processing while the next reference location is within the interval
|
// We keep processing while the next reference location is within the interval
|
||||||
while( locusView.hasNext() ) {
|
while( locusView.hasNext() && ! done ) {
|
||||||
AlignmentContext locus = locusView.next();
|
AlignmentContext locus = locusView.next();
|
||||||
GenomeLoc location = locus.getLocation();
|
GenomeLoc location = locus.getLocation();
|
||||||
|
|
||||||
|
|
@ -65,26 +66,28 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
||||||
referenceView.expandBoundsToAccomodateLoc(location);
|
referenceView.expandBoundsToAccomodateLoc(location);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Iterate forward to get all reference ordered data covering this location
|
|
||||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
|
||||||
|
|
||||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||||
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||||
|
|
||||||
|
// Iterate forward to get all reference ordered data covering this location
|
||||||
|
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||||
|
|
||||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||||
if (keepMeP) {
|
if (keepMeP) {
|
||||||
M x = walker.map(tracker, refContext, locus);
|
M x = walker.map(tracker, refContext, locus);
|
||||||
sum = walker.reduce(x, sum);
|
sum = walker.reduce(x, sum);
|
||||||
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
printProgress(dataProvider.getShard(),locus.getLocation());
|
printProgress(dataProvider.getShard(),locus.getLocation());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// We have a final map call to execute here to clean up the skipped based from the
|
// We have a final map call to execute here to clean up the skipped based from the
|
||||||
// last position in the ROD to that in the interval
|
// last position in the ROD to that in the interval
|
||||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
|
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
|
||||||
|
// only do this if the walker isn't done!
|
||||||
RodLocusView rodLocusView = (RodLocusView)locusView;
|
RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||||
long nSkipped = rodLocusView.getLastSkippedBases();
|
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||||
if ( nSkipped > 0 ) {
|
if ( nSkipped > 0 ) {
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,9 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
||||||
ReadView reads = new ReadView(dataProvider);
|
ReadView reads = new ReadView(dataProvider);
|
||||||
List<SAMRecord> pairs = new ArrayList<SAMRecord>();
|
List<SAMRecord> pairs = new ArrayList<SAMRecord>();
|
||||||
|
|
||||||
|
boolean done = walker.isDone();
|
||||||
for(SAMRecord read: reads) {
|
for(SAMRecord read: reads) {
|
||||||
|
if ( done ) break;
|
||||||
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
|
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
|
||||||
|
|
||||||
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
|
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
|
||||||
|
|
@ -65,6 +67,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
||||||
|
|
||||||
printProgress(dataProvider.getShard(),null);
|
printProgress(dataProvider.getShard(),null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
|
|
||||||
// If any data was left in the queue, process it.
|
// If any data was left in the queue, process it.
|
||||||
|
|
|
||||||
|
|
@ -82,8 +82,10 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
||||||
// get the reference ordered data
|
// get the reference ordered data
|
||||||
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||||
|
|
||||||
|
boolean done = walker.isDone();
|
||||||
// while we still have more reads
|
// while we still have more reads
|
||||||
for (SAMRecord read : reads) {
|
for (SAMRecord read : reads) {
|
||||||
|
if ( done ) break;
|
||||||
// ReferenceContext -- the reference bases covered by the read
|
// ReferenceContext -- the reference bases covered by the read
|
||||||
ReferenceContext refContext = null;
|
ReferenceContext refContext = null;
|
||||||
|
|
||||||
|
|
@ -106,6 +108,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
||||||
|
|
||||||
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
|
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
|
||||||
printProgress(dataProvider.getShard(),locus);
|
printProgress(dataProvider.getShard(),locus);
|
||||||
|
done = walker.isDone();
|
||||||
}
|
}
|
||||||
return sum;
|
return sum;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,5 +23,4 @@ import java.lang.annotation.*;
|
||||||
@Target(ElementType.TYPE)
|
@Target(ElementType.TYPE)
|
||||||
public @interface Allows {
|
public @interface Allows {
|
||||||
DataSource[] value();
|
DataSource[] value();
|
||||||
RMD[] referenceMetaData() default {};
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
||||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import net.sf.samtools.util.StringUtil;
|
import net.sf.samtools.util.StringUtil;
|
||||||
|
import org.broadinstitute.sting.commandline.Advanced;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Hidden;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||||
|
|
@ -50,44 +52,158 @@ import java.util.regex.Matcher;
|
||||||
import java.util.regex.Pattern;
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads
|
* This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.
|
||||||
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
|
*
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* It allows the user to clip bases in reads with poor quality scores, that match particular
|
||||||
|
* sequences, or that were generated by particular machine cycles.
|
||||||
|
*
|
||||||
|
* <dl>
|
||||||
|
* <dt>Quality score based clipping</dt>
|
||||||
|
* <dd>
|
||||||
|
* Clip bases from the read in clipper from
|
||||||
|
* <br>argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)</br>
|
||||||
|
* to the end of the read. This is blatantly stolen from BWA.
|
||||||
|
*
|
||||||
|
* Walk through the read from the end (in machine cycle order) to the beginning, calculating the
|
||||||
|
* running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this
|
||||||
|
* sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the
|
||||||
|
* clipping index in the read (from the end).
|
||||||
|
* </dd>
|
||||||
|
* <dt>Cycle based clipping</dt>
|
||||||
|
* <dd>Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
|
||||||
|
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions).
|
||||||
|
* For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12.
|
||||||
|
* </dd>
|
||||||
|
* <dt>Sequence matching</dt>
|
||||||
|
* <dd>Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm,
|
||||||
|
* filtering only bases whose sequence exactly matches SEQ.</dd>
|
||||||
|
* </dl>
|
||||||
|
*
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* Any number of BAM files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* A new BAM file containing all of the reads from the input BAMs with the user-specified clipping
|
||||||
|
* operation applied to each read.
|
||||||
|
* </p>
|
||||||
|
* <p>
|
||||||
|
* <h3>Summary output</h3>
|
||||||
|
* <pre>
|
||||||
|
* Number of examined reads 13
|
||||||
|
* Number of clipped reads 13
|
||||||
|
* Percent of clipped reads 100.00
|
||||||
|
* Number of examined bases 988
|
||||||
|
* Number of clipped bases 126
|
||||||
|
* Percent of clipped bases 12.75
|
||||||
|
* Number of quality-score clipped bases 126
|
||||||
|
* Number of range clipped bases 0
|
||||||
|
* Number of sequence clipped bases 0
|
||||||
|
* </pre>
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <h3>Example clipping</h3>
|
||||||
|
* Suppose we are given this read:
|
||||||
|
* <pre>
|
||||||
|
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||||
|
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||||
|
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* If we are clipping reads with -QT 10 and -CR WRITE_NS, we get:
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||||
|
* NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||||
|
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Whereas with -CR WRITE_Q0S:
|
||||||
|
* <pre>
|
||||||
|
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||||
|
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||||
|
* !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* Or -CR SOFTCLIP_BASES:
|
||||||
|
* <pre>
|
||||||
|
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3133 29 17S59M * * *
|
||||||
|
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||||
|
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||||
|
* </pre>
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
|
||||||
|
* -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
|
||||||
|
* </pre>
|
||||||
|
|
||||||
|
* @author Mark DePristo
|
||||||
|
* @since 2010
|
||||||
*/
|
*/
|
||||||
@Requires({DataSource.READS})
|
@Requires({DataSource.READS})
|
||||||
public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.ClippingData> {
|
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
|
||||||
@Output
|
/**
|
||||||
PrintStream out;
|
* If provided, ClipReads will write summary statistics about the clipping operations applied
|
||||||
|
* to the reads to this file.
|
||||||
|
*/
|
||||||
|
@Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false)
|
||||||
|
PrintStream out = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* an optional argument to dump the reads out to a BAM file
|
* The output SAM/BAM file will be written here
|
||||||
*/
|
*/
|
||||||
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
@Output(doc = "Write BAM output here", required = true)
|
||||||
StingSAMFileWriter outputBam = null;
|
StingSAMFileWriter outputBam;
|
||||||
|
|
||||||
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
|
/**
|
||||||
|
* If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this
|
||||||
|
* quality score threshold.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false)
|
||||||
int qTrimmingThreshold = -1;
|
int qTrimmingThreshold = -1;
|
||||||
|
|
||||||
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
|
/**
|
||||||
|
* Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
|
||||||
|
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based
|
||||||
|
* values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11,
|
||||||
|
* and 12.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false)
|
||||||
String cyclesToClipArg = null;
|
String cyclesToClipArg = null;
|
||||||
|
|
||||||
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false)
|
/**
|
||||||
|
* Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the
|
||||||
|
* sequences in the file.
|
||||||
|
*/
|
||||||
|
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false)
|
||||||
String clipSequenceFile = null;
|
String clipSequenceFile = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clips bases from the reads matching the provided SEQ. Can be provided any number of times on the command line
|
||||||
|
*/
|
||||||
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
|
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
|
||||||
String[] clipSequencesArgs = null;
|
String[] clipSequencesArgs = null;
|
||||||
|
|
||||||
@Argument(fullName="read", doc="", required=false)
|
/**
|
||||||
String onlyDoRead = null;
|
* The different values for this argument determines how ClipReads applies clips to the reads. This can range
|
||||||
|
* from writing Ns over the clipped bases to hard clipping away the bases from the BAM.
|
||||||
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
|
*/
|
||||||
//boolean keepCompletelyClippedReads = false;
|
|
||||||
|
|
||||||
// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
|
|
||||||
// boolean onlyClipFirstSeqMatch = false;
|
|
||||||
|
|
||||||
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
||||||
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
||||||
|
|
||||||
|
@Hidden
|
||||||
|
@Advanced
|
||||||
|
@Argument(fullName="read", doc="", required=false)
|
||||||
|
String onlyDoRead = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* List of sequence that should be clipped from the reads
|
* List of sequence that should be clipped from the reads
|
||||||
|
|
@ -180,12 +296,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
* @param read the read itself, as a SAMRecord
|
* @param read the read itself, as a SAMRecord
|
||||||
* @return the ReadClipper object describing what should be done to clip this read
|
* @return the ReadClipper object describing what should be done to clip this read
|
||||||
*/
|
*/
|
||||||
public ReadClipper map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||||
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
||||||
}
|
}
|
||||||
ReadClipper clipper = new ReadClipper(read);
|
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
|
||||||
|
|
||||||
//
|
//
|
||||||
// run all three clipping modules
|
// run all three clipping modules
|
||||||
|
|
@ -205,9 +321,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipSequences(ReadClipper clipper) {
|
private void clipSequences(ReadClipperWithData clipper) {
|
||||||
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
|
|
||||||
for (SeqToClip stc : sequencesToClip) {
|
for (SeqToClip stc : sequencesToClip) {
|
||||||
// we have a pattern for both the forward and the reverse strands
|
// we have a pattern for both the forward and the reverse strands
|
||||||
|
|
@ -223,11 +340,14 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
if (found) {
|
if (found) {
|
||||||
int start = match.start();
|
int start = match.start();
|
||||||
int stop = match.end() - 1;
|
int stop = match.end() - 1;
|
||||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
clipper.addOp(op);
|
clipper.addOp(op);
|
||||||
|
data.incSeqClippedBases(stc.seq, op.getLength());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -252,9 +372,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipCycles(ReadClipper clipper) {
|
private void clipCycles(ReadClipperWithData clipper) {
|
||||||
if (cyclesToClip != null) {
|
if (cyclesToClip != null) {
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
|
|
||||||
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
||||||
int cycleStart = p.first;
|
int cycleStart = p.first;
|
||||||
|
|
@ -270,10 +391,13 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
int start = startStop.first;
|
int start = startStop.first;
|
||||||
int stop = startStop.second;
|
int stop = startStop.second;
|
||||||
|
|
||||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
clipper.addOp(op);
|
clipper.addOp(op);
|
||||||
|
data.incNRangeClippedBases(op.getLength());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -291,8 +415,9 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
*
|
*
|
||||||
* @param clipper
|
* @param clipper
|
||||||
*/
|
*/
|
||||||
private void clipBadQualityScores(ReadClipper clipper) {
|
private void clipBadQualityScores(ReadClipperWithData clipper) {
|
||||||
SAMRecord read = clipper.getRead();
|
SAMRecord read = clipper.getRead();
|
||||||
|
ClippingData data = clipper.getData();
|
||||||
int readLen = read.getReadBases().length;
|
int readLen = read.getReadBases().length;
|
||||||
byte[] quals = read.getBaseQualities();
|
byte[] quals = read.getBaseQualities();
|
||||||
|
|
||||||
|
|
@ -311,8 +436,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
if (clipPoint != -1) {
|
if (clipPoint != -1) {
|
||||||
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
|
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
|
||||||
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
|
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
|
||||||
clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
//clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
||||||
|
ClippingOp op = new ClippingOp(start, stop);
|
||||||
|
clipper.addOp(op);
|
||||||
|
data.incNQClippedBases(op.getLength());
|
||||||
}
|
}
|
||||||
|
clipper.setData(data);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -325,7 +454,7 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
return new ClippingData(sequencesToClip);
|
return new ClippingData(sequencesToClip);
|
||||||
}
|
}
|
||||||
|
|
||||||
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
|
public ClippingData reduce(ReadClipperWithData clipper, ClippingData data) {
|
||||||
if ( clipper == null )
|
if ( clipper == null )
|
||||||
return data;
|
return data;
|
||||||
|
|
||||||
|
|
@ -340,23 +469,8 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
data.nTotalBases += clipper.getRead().getReadLength();
|
data.nTotalBases += clipper.getRead().getReadLength();
|
||||||
if (clipper.wasClipped()) {
|
if (clipper.wasClipped()) {
|
||||||
data.nClippedReads++;
|
data.nClippedReads++;
|
||||||
for (ClippingOp op : clipper.getOps()) {
|
data.addData(clipper.getData());
|
||||||
switch (op.type) {
|
|
||||||
case LOW_Q_SCORES:
|
|
||||||
data.incNQClippedBases(op.getLength());
|
|
||||||
break;
|
|
||||||
case WITHIN_CLIP_RANGE:
|
|
||||||
data.incNRangeClippedBases(op.getLength());
|
|
||||||
break;
|
|
||||||
case MATCHES_CLIP_SEQ:
|
|
||||||
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("Unexpected Clipping operator type " + op);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -417,6 +531,23 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
|
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void addData (ClippingData data) {
|
||||||
|
nTotalReads += data.nTotalReads;
|
||||||
|
nTotalBases += data.nTotalBases;
|
||||||
|
nClippedReads += data.nClippedReads;
|
||||||
|
nClippedBases += data.nClippedBases;
|
||||||
|
nQClippedBases += data.nQClippedBases;
|
||||||
|
nRangeClippedBases += data.nRangeClippedBases;
|
||||||
|
nSeqClippedBases += data.nSeqClippedBases;
|
||||||
|
|
||||||
|
for (String seqClip : data.seqClipCounts.keySet()) {
|
||||||
|
Long count = data.seqClipCounts.get(seqClip);
|
||||||
|
if (seqClipCounts.containsKey(seqClip))
|
||||||
|
count += seqClipCounts.get(seqClip);
|
||||||
|
seqClipCounts.put(seqClip, count);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder s = new StringBuilder();
|
StringBuilder s = new StringBuilder();
|
||||||
|
|
||||||
|
|
@ -439,4 +570,27 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public class ReadClipperWithData extends ReadClipper {
|
||||||
|
private ClippingData data;
|
||||||
|
|
||||||
|
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
|
||||||
|
super(read);
|
||||||
|
data = new ClippingData(clipSeqs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ClippingData getData() {
|
||||||
|
return data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setData(ClippingData data) {
|
||||||
|
this.data = data;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addData(ClippingData data) {
|
||||||
|
this.data.addData(data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
|
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
|
|
||||||
|
|
@ -17,7 +17,7 @@ import java.util.Set;
|
||||||
* To change this template use File | Settings | File Templates.
|
* To change this template use File | Settings | File Templates.
|
||||||
*/
|
*/
|
||||||
@Requires({DataSource.READS,DataSource.REFERENCE})
|
@Requires({DataSource.READS,DataSource.REFERENCE})
|
||||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
|
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
|
||||||
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
|
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter;
|
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
|
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
|
||||||
|
|
@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
@By(DataSource.READS)
|
@By(DataSource.READS)
|
||||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||||
@PartitionBy(PartitionType.INTERVAL)
|
@PartitionBy(PartitionType.INTERVAL)
|
||||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
|
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||||
// Do we actually want to operate on the context?
|
// Do we actually want to operate on the context?
|
||||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
|
|
||||||
|
|
@ -25,15 +25,14 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.commandline.Argument;
|
import org.broadinstitute.sting.commandline.Argument;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||||
|
|
@ -41,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -68,6 +68,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
||||||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||||
public boolean SHOW_INDEL_PILEUPS = false;
|
public boolean SHOW_INDEL_PILEUPS = false;
|
||||||
|
|
||||||
|
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
|
||||||
|
public List<RodBinding<Feature>> rods = Collections.emptyList();
|
||||||
|
|
||||||
public void initialize() {
|
public void initialize() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -112,18 +115,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
||||||
*/
|
*/
|
||||||
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
||||||
ArrayList<String> rodStrings = new ArrayList<String>();
|
ArrayList<String> rodStrings = new ArrayList<String>();
|
||||||
for ( GATKFeature datum : tracker.getAllRods() ) {
|
for ( Feature datum : tracker.getValues(rods) ) {
|
||||||
if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) {
|
rodStrings.add(datum.toString());
|
||||||
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
String rodString = Utils.join(", ", rodStrings);
|
String rodString = Utils.join(", ", rodStrings);
|
||||||
|
|
||||||
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
|
|
||||||
|
|
||||||
if ( dbsnp != null)
|
|
||||||
rodString += DbSNPHelper.toMediumString(dbsnp);
|
|
||||||
|
|
||||||
if ( !rodString.equals("") )
|
if ( !rodString.equals("") )
|
||||||
rodString = "[ROD: " + rodString + "]";
|
rodString = "[ROD: " + rodString + "]";
|
||||||
|
|
||||||
|
|
@ -132,8 +128,6 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onTraversalDone(Integer result) {
|
public void onTraversalDone(Integer result) {
|
||||||
// Double check traversal result to make count is the same.
|
|
||||||
// TODO: Is this check necessary?
|
|
||||||
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,21 +25,24 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broadinstitute.sting.commandline.Input;
|
||||||
import org.broadinstitute.sting.commandline.Output;
|
import org.broadinstitute.sting.commandline.Output;
|
||||||
|
import org.broadinstitute.sting.commandline.RodBinding;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
|
||||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
|
||||||
|
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
||||||
* of the given ROD.
|
* of the given ROD.
|
||||||
*/
|
*/
|
||||||
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
||||||
|
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
|
||||||
|
public RodBinding<Feature> input;
|
||||||
|
|
||||||
@Output
|
@Output
|
||||||
PrintStream out;
|
PrintStream out;
|
||||||
|
|
||||||
|
|
@ -61,11 +64,8 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
||||||
if ( tracker == null )
|
if ( tracker == null )
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
|
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
|
||||||
while ( rods.hasNext() ) {
|
out.println(feature.toString());
|
||||||
Object rod = rods.next().getUnderlyingObject();
|
|
||||||
if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) )
|
|
||||||
out.println(rod.toString());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
||||||
|
|
@ -40,26 +40,72 @@ import java.util.TreeSet;
|
||||||
|
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
|
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||||
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
|
*
|
||||||
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter
|
* <p>
|
||||||
* command line argument.
|
* PrintReads can dynamically merge the contents of multiple input BAM files, resulting
|
||||||
|
* in merged output sorted in coordinate order. Can also optionally filter reads based on the
|
||||||
|
* --read_filter command line argument.
|
||||||
|
*
|
||||||
|
* <h2>Input</h2>
|
||||||
|
* <p>
|
||||||
|
* One or more bam files.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Output</h2>
|
||||||
|
* <p>
|
||||||
|
* A single processed bam file.
|
||||||
|
* </p>
|
||||||
|
*
|
||||||
|
* <h2>Examples</h2>
|
||||||
|
* <pre>
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T PrintReads \
|
||||||
|
* -o output.bam \
|
||||||
|
* -I input1.bam \
|
||||||
|
* -I input2.bam \
|
||||||
|
* --read_filter MappingQualityZero
|
||||||
|
*
|
||||||
|
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||||
|
* -R ref.fasta \
|
||||||
|
* -T PrintReads \
|
||||||
|
* -o output.bam \
|
||||||
|
* -I input.bam \
|
||||||
|
* -n 2000
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
*/
|
*/
|
||||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||||
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||||
/** an optional argument to dump the reads out to a BAM file */
|
|
||||||
@Output(doc="Write output to this BAM filename instead of STDOUT")
|
@Output(doc="Write output to this BAM filename instead of STDOUT")
|
||||||
SAMFileWriter out;
|
SAMFileWriter out;
|
||||||
|
|
||||||
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
|
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
|
||||||
String readGroup = null;
|
String readGroup = null;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* For example, --platform ILLUMINA or --platform 454.
|
||||||
|
*/
|
||||||
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
||||||
String platform = null; // E.g. ILLUMINA, 454
|
String platform = null;
|
||||||
|
|
||||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||||
int nReadsToPrint = -1;
|
int nReadsToPrint = -1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only reads from samples listed in the provided file(s) will be included in the output.
|
||||||
|
*/
|
||||||
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
|
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
|
||||||
public Set<File> sampleFile = new TreeSet<File>();
|
public Set<File> sampleFile = new TreeSet<File>();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Only reads from the sample(s) will be included in the output.
|
||||||
|
*/
|
||||||
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||||
public Set<String> sampleNames = new TreeSet<String>();
|
public Set<String> sampleNames = new TreeSet<String>();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import net.sf.picard.filter.SamRecordFilter;
|
import net.sf.picard.filter.SamRecordFilter;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
|
||||||
import java.lang.annotation.*;
|
import java.lang.annotation.*;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -26,11 +26,14 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||||
import org.broadinstitute.sting.utils.collections.Pair;
|
import org.broadinstitute.sting.utils.collections.Pair;
|
||||||
|
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||||
|
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
|
@ -44,6 +47,10 @@ import java.util.List;
|
||||||
@ReadFilters(MalformedReadFilter.class)
|
@ReadFilters(MalformedReadFilter.class)
|
||||||
@PartitionBy(PartitionType.NONE)
|
@PartitionBy(PartitionType.NONE)
|
||||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||||
|
@DocumentedGATKFeature(
|
||||||
|
groupName = "GATK walkers",
|
||||||
|
summary = "General tools available for running on the command line as part of the GATK package",
|
||||||
|
extraDocs = {CommandLineGATK.class})
|
||||||
public abstract class Walker<MapType, ReduceType> {
|
public abstract class Walker<MapType, ReduceType> {
|
||||||
final protected static Logger logger = Logger.getLogger(Walker.class);
|
final protected static Logger logger = Logger.getLogger(Walker.class);
|
||||||
private GenomeAnalysisEngine toolkit;
|
private GenomeAnalysisEngine toolkit;
|
||||||
|
|
@ -119,6 +126,17 @@ public abstract class Walker<MapType, ReduceType> {
|
||||||
|
|
||||||
public void initialize() { }
|
public void initialize() { }
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A function for overloading in subclasses providing a mechanism to abort early from a walker.
|
||||||
|
*
|
||||||
|
* If this ever returns true, then the Traversal engine will stop executing map calls
|
||||||
|
* and start the process of shutting down the walker in an orderly fashion.
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public boolean isDone() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Provide an initial value for reduce computations.
|
* Provide an initial value for reduce computations.
|
||||||
* @return Initial value of reduce.
|
* @return Initial value of reduce.
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
|
@ -42,9 +43,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class AlleleBalance implements InfoFieldAnnotation {
|
public class AlleleBalance extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
@ -89,7 +90,7 @@ public class AlleleBalance implements InfoFieldAnnotation {
|
||||||
}
|
}
|
||||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||||
int refCount = indelPileup.size();
|
int refCount = indelPileup.size();
|
||||||
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||||
|
|
||||||
if ( refCount + altCount == 0 ) {
|
if ( refCount + altCount == 0 ) {
|
||||||
continue;
|
continue;
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -15,9 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation {
|
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||||
if (ratio == null)
|
if (ratio == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
|
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||||
|
|
||||||
|
|
||||||
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {
|
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {
|
||||||
|
|
|
||||||
|
|
@ -34,6 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
|
@ -46,9 +47,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class BaseCounts implements InfoFieldAnnotation {
|
public class BaseCounts extends InfoFieldAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
@ -43,14 +44,14 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( ! vc.hasGenotypes() )
|
if ( ! vc.hasGenotypes() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||||
|
|
@ -16,9 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
|
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( stratifiedContexts.size() == 0 )
|
if ( stratifiedContexts.size() == 0 )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||||
|
|
@ -22,13 +23,13 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation {
|
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static String REF_ALLELE = "REF";
|
private static String REF_ALLELE = "REF";
|
||||||
|
|
||||||
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||||
if ( g == null || !g.isCalled() )
|
if ( g == null || !g.isCalled() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
|
@ -42,11 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
||||||
public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation {
|
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private static final String FS = "FS";
|
private static final String FS = "FS";
|
||||||
private static final double MIN_PVALUE = 1E-320;
|
private static final double MIN_PVALUE = 1E-320;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if ( ! vc.isVariant() || vc.isFiltered() )
|
if ( ! vc.isVariant() || vc.isFiltered() )
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.utils.BaseUtils;
|
import org.broadinstitute.sting.utils.BaseUtils;
|
||||||
|
|
@ -16,9 +17,9 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
|
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
double content = computeGCContent(ref);
|
double content = computeGCContent(ref);
|
||||||
Map<String, Object> map = new HashMap<String, Object>();
|
Map<String, Object> map = new HashMap<String, Object>();
|
||||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.utils.MathUtils;
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
|
@ -23,11 +24,11 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// A set of annotations calculated directly from the GLs
|
// A set of annotations calculated directly from the GLs
|
||||||
public class GLstats implements InfoFieldAnnotation, StandardAnnotation {
|
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
||||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||||
|
|
|
||||||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||||
|
|
@ -48,13 +49,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
private final static boolean DEBUG = false;
|
private final static boolean DEBUG = false;
|
||||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||||
private final static char REGEXP_WILDCARD = '.';
|
private final static char REGEXP_WILDCARD = '.';
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
||||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||||
import org.broadinstitute.sting.utils.QualityUtils;
|
import org.broadinstitute.sting.utils.QualityUtils;
|
||||||
|
|
@ -18,13 +19,13 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
|
||||||
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
|
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
||||||
|
|
||||||
private static final int MIN_SAMPLES = 10;
|
private static final int MIN_SAMPLES = 10;
|
||||||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||||
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||||
|
|
||||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||||
|
|
||||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||||
|
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue