Merge remote-tracking branch 'unstable/master'
This commit is contained in:
commit
3cd9f3fe81
477
build.xml
477
build.xml
|
|
@ -43,16 +43,17 @@
|
|||
<property name="scala.classes" value="${build.dir}/scala/classes" />
|
||||
|
||||
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
|
||||
|
||||
<property name="javadoc.dir" value="javadoc" />
|
||||
<property name="scaladoc.dir" value="scaladoc" />
|
||||
|
||||
<!-- Contracts for Java -->
|
||||
<!-- To disable, run with -Duse.contracts=false -->
|
||||
<property name="use.contracts" value="true" />
|
||||
<!-- By default, enabled only for test targets -->
|
||||
<!-- To disable for test targets, run with -Duse.contracts=false -->
|
||||
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
|
||||
<property name="java.contracts" value="${build.dir}/java/contracts" />
|
||||
<property name="contracts.version" value="1.0-20110609" />
|
||||
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
|
||||
|
||||
<!-- where to find the tribble distro -->
|
||||
<property name="tribble.dir" value="tribble" />
|
||||
|
||||
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
|
||||
<property name="findbugs.home" value="./findbugs"/>
|
||||
|
|
@ -79,6 +80,16 @@
|
|||
<patternset refid="java.source.pattern" />
|
||||
</fileset>
|
||||
|
||||
<!-- terrible hack to get gatkdocs to see all files -->
|
||||
<patternset id="all.java.source.pattern">
|
||||
<include name="${java.public.source.dir}/**/*.java" />
|
||||
<include name="${java.private.source.dir}/**/*.java" />
|
||||
</patternset>
|
||||
|
||||
<fileset id="all.java.source.files" dir="${basedir}">
|
||||
<patternset refid="all.java.source.pattern" />
|
||||
</fileset>
|
||||
|
||||
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
|
||||
<include name="**/*.java" />
|
||||
</fileset>
|
||||
|
|
@ -160,6 +171,16 @@
|
|||
<property name="scala.target" value="core"/>
|
||||
</target>
|
||||
|
||||
<target name="init.buildpublic">
|
||||
<!-- Set the properties needed to build public only -->
|
||||
<property name="gatk.target" value="core"/>
|
||||
<property name="scala.target" value="core"/>
|
||||
</target>
|
||||
|
||||
<target name="init.usecontracts">
|
||||
<property name="use.contracts" value="true" />
|
||||
</target>
|
||||
|
||||
<target name="git.describe">
|
||||
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
|
||||
<arg line="describe" />
|
||||
|
|
@ -234,6 +255,7 @@
|
|||
|
||||
<!-- Create the build directory structure used by compile -->
|
||||
<mkdir dir="${build.dir}"/>
|
||||
<mkdir dir="${lib.dir}"/>
|
||||
<mkdir dir="${java.classes}"/>
|
||||
<mkdir dir="${java.contracts}"/>
|
||||
|
||||
|
|
@ -265,7 +287,7 @@
|
|||
</taskdef>
|
||||
</target>
|
||||
|
||||
<target name="gatk.compile.public.source" depends="tribble,init,resolve">
|
||||
<target name="gatk.compile.public.source" depends="init,resolve">
|
||||
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
|
||||
<compilerarg value="-proc:none"/>
|
||||
</javac>
|
||||
|
|
@ -326,7 +348,7 @@
|
|||
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
|
||||
description="create GATK contracts" if="include.contracts" />
|
||||
|
||||
<target name="gatk.compile" depends="tribble,init,resolve,gatk.compile.source,gatk.contracts" />
|
||||
<target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
|
||||
|
||||
<target name="init.queue-extensions.generate" depends="gatk.compile">
|
||||
<condition property="uptodate.queue-extensions.generate">
|
||||
|
|
@ -457,6 +479,35 @@
|
|||
</javadoc>
|
||||
</target>
|
||||
|
||||
<target name="clean.gatkdocs">
|
||||
<delete dir="gatkdocs"/>
|
||||
</target>
|
||||
|
||||
<target name="gatkdocs" depends="gatk.compile"
|
||||
description="Extract help key/value pair file from the JavaDoc tags.">
|
||||
<path id="doclet.classpath">
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
</path>
|
||||
|
||||
<!-- Run with -Dgatkdocs.include.hidden=true to include documentation for hidden features -->
|
||||
<condition property="gatkdocs.include.hidden.arg" value="-include-hidden" else="">
|
||||
<isset property="gatkdocs.include.hidden" />
|
||||
</condition>
|
||||
|
||||
<javadoc doclet="org.broadinstitute.sting.utils.help.GATKDoclet"
|
||||
docletpathref="doclet.classpath"
|
||||
classpathref="external.dependencies"
|
||||
classpath="${java.classes}"
|
||||
additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp "${build.timestamp}" -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> <!-- -test to only do DocumentationTest walker -->
|
||||
<sourcefiles>
|
||||
<union>
|
||||
<fileset refid="all.java.source.files"/>
|
||||
</union>
|
||||
</sourcefiles>
|
||||
</javadoc>
|
||||
</target>
|
||||
|
||||
<target name="sting.compile" depends="gatk.compile, scala.compile" />
|
||||
|
||||
<target name="init.jar" depends="sting.compile,extracthelp">
|
||||
|
|
@ -490,6 +541,7 @@
|
|||
<include name="**/utils/codecs/**/*.class"/>
|
||||
<include name="**/utils/variantcontext/**/*.class"/>
|
||||
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
|
||||
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
|
||||
</fileset>
|
||||
</jar>
|
||||
</target>
|
||||
|
|
@ -657,54 +709,6 @@
|
|||
</antcall>
|
||||
</target>
|
||||
|
||||
<target name="test.init.compile">
|
||||
<mkdir dir="${java.test.classes}"/>
|
||||
<mkdir dir="${scala.test.classes}"/>
|
||||
<antcall target="resolve">
|
||||
<param name="ivy.conf" value="test"/>
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
|
||||
<echo message="Sting: Compiling test cases!"/>
|
||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||
<src path="${java.public.test.sources}"/>
|
||||
<src path="${java.private.test.sources}"/>
|
||||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}"/>
|
||||
<pathelement location="${java.contracts}"/>
|
||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
||||
</classpath>
|
||||
<compilerarg value="-proc:none"/>
|
||||
<!--
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
-->
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
||||
<echo message="Scala: Compiling test cases!"/>
|
||||
<antcall target="resolve">
|
||||
<param name="ivy.conf" value="test"/>
|
||||
</antcall>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.public.test.sources}" />
|
||||
<src path="${scala.private.test.sources}" />
|
||||
<include name="**/*.scala"/>
|
||||
<classpath>
|
||||
<path refid="scala.dependencies"/>
|
||||
<pathelement location="${scala.test.classes}"/>
|
||||
<pathelement location="${java.test.classes}"/>
|
||||
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
|
||||
</classpath>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="test.compile" depends="test.java.compile,test.scala.compile">
|
||||
</target>
|
||||
|
||||
<!-- new scala target -->
|
||||
|
||||
<target name="scala" description="build the scala directory">
|
||||
|
|
@ -718,20 +722,113 @@
|
|||
<!-- ***************************************************************************** -->
|
||||
<!-- where to put reports and tests-->
|
||||
<property name="report" value="${build.dir}/report"/>
|
||||
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
||||
<property name="test.output" value="${dist.dir}/test"/>
|
||||
<property name="java.public.test.sources" value="public/java/test"/>
|
||||
<property name="java.private.test.sources" value="private/java/test"/>
|
||||
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
|
||||
<property name="java.public.test.classes" value="${java.test.classes}/public"/>
|
||||
<property name="java.private.test.classes" value="${java.test.classes}/private"/>
|
||||
<property name="java.public.test.sources" value="${public.dir}/java/test"/>
|
||||
<property name="java.private.test.sources" value="${private.dir}/java/test"/>
|
||||
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
|
||||
<property name="scala.public.test.sources" value="public/scala/test"/>
|
||||
<property name="scala.private.test.sources" value="private/scala/test"/>
|
||||
<property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
|
||||
<property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
|
||||
<property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
|
||||
<property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
|
||||
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
|
||||
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
|
||||
<property name="test.maxmemory" value="4g"/>
|
||||
|
||||
<target name="test.init.compile">
|
||||
<mkdir dir="${java.test.classes}"/>
|
||||
<mkdir dir="${scala.test.classes}"/>
|
||||
<antcall target="resolve">
|
||||
<param name="ivy.conf" value="test"/>
|
||||
</antcall>
|
||||
</target>
|
||||
|
||||
<target name="test.java.public.compile" depends="dist,test.init.compile">
|
||||
<mkdir dir="${java.public.test.classes}"/>
|
||||
<echo message="Sting: Compiling public test cases!"/>
|
||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||
<src path="${java.public.test.sources}"/>
|
||||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}"/>
|
||||
<pathelement location="${java.contracts}"/>
|
||||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
<compilerarg value="-proc:none"/>
|
||||
<!--
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
-->
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
|
||||
<mkdir dir="${java.private.test.classes}"/>
|
||||
<echo message="Sting: Compiling private test cases!"/>
|
||||
<javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
|
||||
<src path="${java.private.test.sources}"/>
|
||||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.public.test.classes}"/>
|
||||
<pathelement location="${java.classes}"/>
|
||||
<pathelement location="${java.contracts}"/>
|
||||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
<compilerarg value="-proc:none"/>
|
||||
<!--
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
-->
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
|
||||
|
||||
<target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
|
||||
<mkdir dir="${scala.public.test.classes}"/>
|
||||
<echo message="Scala: Compiling public test cases!"/>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.public.test.sources}" />
|
||||
<classpath>
|
||||
<path refid="scala.dependencies"/>
|
||||
<pathelement location="${java.public.test.classes}"/>
|
||||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
|
||||
<mkdir dir="${scala.private.test.classes}"/>
|
||||
<echo message="Scala: Compiling private test cases!"/>
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.private.test.sources}" />
|
||||
<classpath>
|
||||
<path refid="scala.dependencies"/>
|
||||
<pathelement location="${scala.public.test.classes}"/>
|
||||
<pathelement location="${java.public.test.classes}"/>
|
||||
<pathelement location="${java.private.test.classes}"/>
|
||||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
|
||||
|
||||
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
|
||||
|
||||
<!-- TEST -->
|
||||
<macrodef name="run-test">
|
||||
<attribute name="testtype"/>
|
||||
<attribute name="outputdir"/>
|
||||
<attribute name="runfailed"/>
|
||||
|
||||
<sequential>
|
||||
<condition property="run.failed.tests">
|
||||
<equals arg1="@{runfailed}" arg2="true"/>
|
||||
</condition>
|
||||
|
||||
<!-- Get the pipeline run type. Default to dry. -->
|
||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||
|
|
@ -741,10 +838,10 @@
|
|||
<isset property="include.contracts" />
|
||||
</condition>
|
||||
|
||||
<mkdir dir="${report}/@{testtype}"/>
|
||||
<mkdir dir="@{outputdir}"/>
|
||||
<echo message="Sting: Running @{testtype} test cases!"/>
|
||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
||||
<testng outputDir="${report}/@{testtype}"
|
||||
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
|
||||
<testng outputDir="@{outputdir}"
|
||||
haltOnFailure="false" failureProperty="test.failure"
|
||||
verbose="2"
|
||||
workingDir="${basedir}"
|
||||
|
|
@ -762,199 +859,163 @@
|
|||
<pathelement location="${java.classes}" />
|
||||
<pathelement location="${scala.classes}" />
|
||||
<pathelement location="${java.contracts}" />
|
||||
<pathelement location="${java.test.classes}" />
|
||||
<pathelement location="${scala.test.classes}" />
|
||||
<pathelement location="${java.public.test.classes}" />
|
||||
<pathelement location="${java.private.test.classes}" />
|
||||
<pathelement location="${scala.public.test.classes}" />
|
||||
<pathelement location="${scala.private.test.classes}" />
|
||||
</classpath>
|
||||
|
||||
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
|
||||
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
|
||||
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
|
||||
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
|
||||
<include name="**/@{testtype}.class" if="include.private"/>
|
||||
</classfileset>
|
||||
<classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
|
||||
<include name="**/@{testtype}*.class" if="scala.include"/>
|
||||
</classfileset>
|
||||
<classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
|
||||
<include name="**/@{testtype}*.class" if="include.scala.private"/>
|
||||
</classfileset>
|
||||
|
||||
<xmlfileset dir="${basedir}">
|
||||
<include name="@{testtype}" if="run.failed.tests"/>
|
||||
</xmlfileset>
|
||||
</testng>
|
||||
|
||||
<!-- generate a report for Bamboo or Hudson to read in -->
|
||||
<junitreport todir="${report}/@{testtype}">
|
||||
<fileset dir="${report}/@{testtype}">
|
||||
<junitreport todir="@{outputdir}">
|
||||
<fileset dir="@{outputdir}">
|
||||
<include name="*/*.xml"/>
|
||||
</fileset>
|
||||
<report format="noframes" todir="${report}/@{testtype}"/>
|
||||
<report format="noframes" todir="@{outputdir}"/>
|
||||
</junitreport>
|
||||
<fail message="test failed" if="test.failure" />
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<!-- FAILED-TEST -->
|
||||
<macrodef name="run-failed-test">
|
||||
<attribute name="xmlfailedtestfile" />
|
||||
<sequential>
|
||||
<!-- Get the pipeline run type. Default to dry. -->
|
||||
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
|
||||
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
|
||||
</condition>
|
||||
|
||||
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
|
||||
<isset property="include.contracts" />
|
||||
</condition>
|
||||
|
||||
<mkdir dir="${report}/failed_rerun" />
|
||||
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
|
||||
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
|
||||
<testng outputDir="${report}/failed_rerun"
|
||||
haltOnFailure="false" failureProperty="test.failure"
|
||||
verbose="2"
|
||||
workingDir="${basedir}"
|
||||
useDefaultListeners="false"
|
||||
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
|
||||
<jvmarg value="-Xmx${test.maxmemory}" />
|
||||
<jvmarg value="-Djava.awt.headless=true" />
|
||||
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
|
||||
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
|
||||
<jvmarg line="${cofoja.jvm.args}"/>
|
||||
<!-- <jvmarg value="-Xdebug"/> -->
|
||||
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
|
||||
<classpath>
|
||||
<path refid="external.dependencies" />
|
||||
<pathelement location="${java.classes}" />
|
||||
<pathelement location="${scala.classes}" />
|
||||
<pathelement location="${java.contracts}" />
|
||||
<pathelement location="${java.test.classes}" />
|
||||
<pathelement location="${scala.test.classes}" />
|
||||
</classpath>
|
||||
|
||||
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
|
||||
</testng>
|
||||
|
||||
<fail message="test failed" if="test.failure" />
|
||||
</sequential>
|
||||
</macrodef>
|
||||
|
||||
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
|
||||
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
|
||||
<target name="alltests">
|
||||
<antcall target="test" inheritAll="false"/>
|
||||
<antcall target="integrationtest" inheritAll="false"/>
|
||||
<antcall target="pipelinetest" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<target name="alltests.public">
|
||||
<antcall target="test.public" inheritAll="false"/>
|
||||
<antcall target="integrationtest.public" inheritAll="false"/>
|
||||
<antcall target="pipelinetest.public" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
|
||||
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
|
||||
<condition property="ttype" value="*UnitTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${ttype}"/>
|
||||
<run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
|
||||
</target>
|
||||
<target name="integrationtest" depends="test.compile" description="Run integration tests">
|
||||
<target name="test.public" depends="init.buildpublic,test"/>
|
||||
|
||||
<target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
|
||||
<condition property="itype" value="*IntegrationTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${itype}"/>
|
||||
<run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
|
||||
</target>
|
||||
<target name="performancetest" depends="test.compile" description="Run performance tests">
|
||||
<target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
|
||||
|
||||
<target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
|
||||
<condition property="ptype" value="*PerformanceTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${ptype}"/>
|
||||
<run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
|
||||
</target>
|
||||
<target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
|
||||
<target name="performancetest.public" depends="init.buildpublic,performancetest" />
|
||||
|
||||
<target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${pipetype}"/>
|
||||
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||
</target>
|
||||
<target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
|
||||
<target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
|
||||
|
||||
<target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
|
||||
<property name="pipeline.run" value="run"/>
|
||||
<condition property="pipetype" value="*PipelineTest" else="${single}">
|
||||
<not><isset property="single"/></not>
|
||||
</condition>
|
||||
<run-test testtype="${pipetype}"/>
|
||||
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
|
||||
</target>
|
||||
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
|
||||
|
||||
<target name="failed-test" depends="init.buildall,test.compile">
|
||||
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||
</target>
|
||||
|
||||
<target name="failed-test" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
|
||||
<target name="failed-integration" depends="init.buildall,test.compile">
|
||||
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||
</target>
|
||||
|
||||
<target name="failed-integration" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
|
||||
<target name="failed-performance" depends="init.buildall,test.compile">
|
||||
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||
</target>
|
||||
|
||||
<target name="failed-performance" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
|
||||
<target name="failed-pipeline" depends="init.buildall,test.compile">
|
||||
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
|
||||
</target>
|
||||
|
||||
<target name="failed-pipeline" depends="test.compile">
|
||||
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
|
||||
</target>
|
||||
|
||||
<!-- ***************************************************************************** -->
|
||||
<!-- *********** Tribble ********* -->
|
||||
<!-- ***************************************************************************** -->
|
||||
<target name="tribble.init" description="checks if tribble is available to build from source">
|
||||
<condition property="tribble.compile.exists">
|
||||
<available file="${tribble.dir}/build.xml"/>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
<!-- compile the library -->
|
||||
<target name="tribble.compile" description="compiles the tribble library" depends="tribble.init" if="tribble.compile.exists">
|
||||
<echo message="Building the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="all" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<!-- copy the compiled library -->
|
||||
<target name="tribble.compile.copy" description="Copies the compiled tribble library" depends="tribble.compile" if="tribble.compile.exists">
|
||||
<copy todir="${lib.dir}">
|
||||
<fileset dir="${tribble.dir}/dist" includes="*.jar"/>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
<!-- copy the precompiled library -->
|
||||
<target name="tribble.library.copy" description="Copies the precompiled tribble library" depends="tribble.init" unless="tribble.compile.exists">
|
||||
<echo message="Copying the Tribble Library..."/>
|
||||
<copy todir="${lib.dir}">
|
||||
<fileset dir="settings/repository/org.broad" includes="tribble*.jar"/>
|
||||
</copy>
|
||||
</target>
|
||||
|
||||
<target name="tribble" description="Copies the tribble jar" depends="tribble.compile.copy,tribble.library.copy"/>
|
||||
|
||||
<target name="tribble.test.init" description="runs the tribble tests" depends="tribble.init">
|
||||
<condition property="tribble.test.run">
|
||||
<and>
|
||||
<isset property="tribble.compile.exists"/>
|
||||
<not><isset property="single"/></not>
|
||||
</and>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
<!-- test tribble using the unit tests set in tribble -->
|
||||
<target name="tribble.test" description="runs the tribble tests" depends="tribble.test.init,tribble.compile" if="tribble.test.run">
|
||||
<echo message="Testing the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
|
||||
<!-- clean tribble -->
|
||||
<target name="tribble.clean" description="cleans the tribble library" depends="tribble.init" if="tribble.compile.exists">
|
||||
<echo message="Cleaning the Tribble Library..."/>
|
||||
<ant antfile="build.xml" target="clean" dir="${tribble.dir}" inheritAll="false"/>
|
||||
</target>
|
||||
<!-- ***************************************************************************** -->
|
||||
<!-- ******************************************************************************** -->
|
||||
<!-- Javadoc -->
|
||||
<!-- ******************************************************************************** -->
|
||||
|
||||
<target name="clean.javadoc">
|
||||
<delete dir="javadoc"/>
|
||||
<delete dir="scaladoc"/>
|
||||
<delete dir="${javadoc.dir}" />
|
||||
</target>
|
||||
|
||||
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc">
|
||||
<mkdir dir="javadoc"/>
|
||||
<javadoc destdir="javadoc"
|
||||
classpathref="external.dependencies">
|
||||
<sourcepath path="${java.public.source.dir}"/>
|
||||
<sourcepath path="${external.dir}"/>
|
||||
<target name="init.javadoc">
|
||||
<mkdir dir="${javadoc.dir}" />
|
||||
</target>
|
||||
|
||||
<target name="javadoc" depends="init.buildpublic,generate.javadoc" description="Generates public javadoc" />
|
||||
|
||||
<target name="javadoc.private" depends="init.buildall,generate.javadoc" description="Generates public and private javadoc" />
|
||||
|
||||
<target name="generate.javadoc" depends="init.javadoc,resolve">
|
||||
<javadoc destdir="${javadoc.dir}" classpathref="external.dependencies">
|
||||
<fileset refid="java.source.files" />
|
||||
<sourcepath path="${external.dir}" />
|
||||
</javadoc>
|
||||
<javadoc destdir="javadoc"
|
||||
classpathref="external.dependencies">
|
||||
<sourcepath path="${java.private.source.dir}"/>
|
||||
<exclude name="**" unless="include.private" />
|
||||
</javadoc>
|
||||
<mkdir dir="scaladoc"/>
|
||||
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.public.source.dir}"/>
|
||||
<src path="${scala.private.source.dir}"/>
|
||||
<src path="${queue-extensions.source.dir}"/>
|
||||
<include name="**/*.scala"/>
|
||||
</target>
|
||||
|
||||
<!-- ******************************************************************************** -->
|
||||
<!-- Scaladoc -->
|
||||
<!-- ******************************************************************************** -->
|
||||
|
||||
<target name="clean.scaladoc">
|
||||
<delete dir="${scaladoc.dir}" />
|
||||
</target>
|
||||
|
||||
<target name="init.scaladoc">
|
||||
<mkdir dir="${scaladoc.dir}" />
|
||||
</target>
|
||||
|
||||
<!-- NOTE: the scaladoc targets require that the environment variable ANT_OPTS has been set to "-Xmx1G" -->
|
||||
|
||||
<target name="scaladoc" depends="init.buildpublic,generate.scaladoc" description="Generates public scaladoc -- set ANT_OPTS to -Xmx1G" />
|
||||
|
||||
<target name="scaladoc.private" depends="init.buildall,generate.scaladoc" description="Generates public and private scaladoc -- set ANT_OPTS to -Xmx1G" />
|
||||
|
||||
<target name="generate.scaladoc" depends="resolve,queue-extensions.generate,init.scala.compile,scala.compile,init.scaladoc">
|
||||
<scaladoc srcdir="${basedir}" destdir="${scaladoc.dir}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<include name="${scala.public.source.dir}/**/*.scala" />
|
||||
<include name="${queue-extensions.source.dir}/**/*.scala" />
|
||||
<include name="${scala.private.source.dir}/**/*.scala" if="include.private" />
|
||||
</scaladoc>
|
||||
</target>
|
||||
|
||||
<!-- ******************************************************************************** -->
|
||||
<!-- Release-related tasks -->
|
||||
<!-- ******************************************************************************** -->
|
||||
|
||||
<!-- Unzip all classes from their current locations and assemble them in a staging directory -->
|
||||
<target name="stage" description="stage files for distribution">
|
||||
<mkdir dir="staging"/>
|
||||
|
|
@ -1044,7 +1105,7 @@
|
|||
</findbugs>
|
||||
</target>
|
||||
|
||||
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc">
|
||||
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
|
||||
<delete dir="out"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="${lib.dir}"/>
|
||||
|
|
|
|||
6
ivy.xml
6
ivy.xml
|
|
@ -12,6 +12,9 @@
|
|||
<dependency org="net.sf" name="picard" rev="latest.integration"/>
|
||||
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
|
||||
|
||||
<!-- Tribble -->
|
||||
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
|
||||
|
||||
<dependency org="log4j" name="log4j" rev="1.2.15">
|
||||
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
|
||||
<exclude org="javax.mail" />
|
||||
|
|
@ -30,6 +33,9 @@
|
|||
|
||||
<!-- Dependencies for the graph aligner -->
|
||||
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
|
||||
|
||||
<!-- Dependencies for the html walker documention -->
|
||||
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
|
||||
|
||||
<!-- Commons Dependencies -->
|
||||
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,169 @@
|
|||
library(gsalib)
|
||||
require("ggplot2")
|
||||
require("gplots")
|
||||
|
||||
#
|
||||
# Standard command line switch. Can we loaded interactively for development
|
||||
# or executed with RScript
|
||||
#
|
||||
args = commandArgs(TRUE)
|
||||
onCMDLine = ! is.na(args[1])
|
||||
if ( onCMDLine ) {
|
||||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
}
|
||||
|
||||
RUNTIME_UNITS = "(sec)"
|
||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
||||
|
||||
#
|
||||
# Helper function to aggregate all of the jobs in the report across all tables
|
||||
#
|
||||
allJobsFromReport <- function(report) {
|
||||
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
|
||||
sub <- lapply(report, function(table) table[,names])
|
||||
do.call("rbind", sub)
|
||||
}
|
||||
|
||||
#
|
||||
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||
#
|
||||
plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
if ( sortOverall ) {
|
||||
title = "All jobs, by analysis, by start time"
|
||||
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
|
||||
} else {
|
||||
title = "All jobs, sorted by start time"
|
||||
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
|
||||
}
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||
maxRelTime = max(allJobs$relDoneTime)
|
||||
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + xlim(0, maxRelTime * 1.1)
|
||||
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job")
|
||||
p <- p + opts(title=title)
|
||||
print(p)
|
||||
}
|
||||
|
||||
#
|
||||
# Plots scheduling efficiency at job events
|
||||
#
|
||||
plotProgressByTime <- function(gatkReport) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
nJobs = dim(allJobs)[1]
|
||||
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
|
||||
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
|
||||
|
||||
countJobs <- function(p) {
|
||||
s = allJobs$relStartTime
|
||||
e = allJobs$relDoneTime
|
||||
x = c() # I wish I knew how to make this work with apply
|
||||
for ( time in times )
|
||||
x = c(x, sum(p(s, e, time)))
|
||||
x
|
||||
}
|
||||
|
||||
pending = countJobs(function(s, e, t) s > t)
|
||||
done = countJobs(function(s, e, t) e < t)
|
||||
running = nJobs - pending - done
|
||||
|
||||
d = data.frame(times=times, pending=pending, running=running, done=done)
|
||||
|
||||
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
|
||||
p <- p + facet_grid(variable ~ ., scales="free")
|
||||
p <- p + geom_line(size=2)
|
||||
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
|
||||
p <- p + opts(title = "Job scheduling")
|
||||
print(p)
|
||||
}
|
||||
|
||||
#
|
||||
# Creates tables for each job in this group
|
||||
#
|
||||
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
|
||||
plotGroup <- function(groupTable) {
|
||||
name = unique(groupTable$analysisName)[1]
|
||||
groupAnnotations = setdiff(names(groupTable), standardColumns)
|
||||
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
|
||||
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
|
||||
|
||||
# create a table showing each job and all annotations
|
||||
textplot(sub, show.rownames=F)
|
||||
title(paste("Job summary for", name, "full itemization"), cex=3)
|
||||
|
||||
# create the table for each combination of values in the group, listing iterations in the columns
|
||||
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
|
||||
|
||||
# histogram of job times by groupAnnotations
|
||||
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
|
||||
# todo -- how do we group by annotations?
|
||||
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
|
||||
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||
print(p)
|
||||
}
|
||||
|
||||
# as above, but averaging over all iterations
|
||||
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
|
||||
if ( dim(sub)[1] > 1 ) {
|
||||
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
|
||||
textplot(as.data.frame(sum), show.rownames=F)
|
||||
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
|
||||
}
|
||||
}
|
||||
|
||||
# print out some useful basic information
|
||||
print("Report")
|
||||
print(paste("Project :", inputFileName))
|
||||
|
||||
convertUnits <- function(gatkReportData) {
|
||||
convertGroup <- function(g) {
|
||||
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g
|
||||
}
|
||||
lapply(gatkReportData, convertGroup)
|
||||
}
|
||||
|
||||
|
||||
# read the table
|
||||
gatkReportData <- gsa.read.gatkreport(inputFileName)
|
||||
gatkReportData <- convertUnits(gatkReportData)
|
||||
#print(summary(gatkReportData))
|
||||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
pdf(outputPDF, height=8.5, width=11)
|
||||
}
|
||||
|
||||
plotJobsGantt(gatkReportData, T)
|
||||
plotJobsGantt(gatkReportData, F)
|
||||
plotProgressByTime(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
plotGroup(group)
|
||||
}
|
||||
|
||||
if ( ! is.na(outputPDF) ) {
|
||||
dev.off()
|
||||
}
|
||||
|
|
@ -20,6 +20,20 @@
|
|||
assign(tableName, d, envir=tableEnv);
|
||||
}
|
||||
|
||||
# Read a fixed width line of text into a list.
|
||||
.gsa.splitFixedWidth <- function(line, columnStarts) {
|
||||
splitStartStop <- function(x) {
|
||||
x = substring(x, starts, stops);
|
||||
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
|
||||
x;
|
||||
}
|
||||
|
||||
starts = c(1, columnStarts);
|
||||
stops = c(columnStarts - 1, nchar(line));
|
||||
|
||||
sapply(line, splitStartStop)[,1];
|
||||
}
|
||||
|
||||
# Load all GATKReport tables from a file
|
||||
gsa.read.gatkreport <- function(filename) {
|
||||
con = file(filename, "r", blocking = TRUE);
|
||||
|
|
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
|
|||
tableName = NA;
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
version = NA;
|
||||
|
||||
for (line in lines) {
|
||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
|
||||
headerFields = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (!is.na(tableName)) {
|
||||
|
|
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
|
|||
tableName = headerFields[2];
|
||||
tableHeader = c();
|
||||
tableRows = c();
|
||||
|
||||
# For differences in versions see
|
||||
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
|
||||
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.1";
|
||||
|
||||
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
|
||||
version = "v0.2";
|
||||
columnStarts = c();
|
||||
|
||||
}
|
||||
|
||||
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
|
||||
# do nothing
|
||||
} else if (!is.na(tableName)) {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
if (version == "v0.1") {
|
||||
row = unlist(strsplit(line, "[[:space:]]+"));
|
||||
|
||||
} else if (version == "v0.2") {
|
||||
if (length(tableHeader) == 0) {
|
||||
headerChars = unlist(strsplit(line, ""));
|
||||
# Find the first position of non space characters, excluding the first character
|
||||
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
|
||||
}
|
||||
|
||||
row = .gsa.splitFixedWidth(line, columnStarts);
|
||||
}
|
||||
|
||||
if (length(tableHeader) == 0) {
|
||||
tableHeader = row;
|
||||
tableHeader = row;
|
||||
} else {
|
||||
tableRows = rbind(tableRows, row);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package net.sf.picard.reference;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
|
||||
|
|
@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
* Produces fai file with same output as samtools faidx
|
||||
*/
|
||||
public class FastaSequenceIndexBuilder {
|
||||
public File fastaFile;
|
||||
ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
|
||||
final public File fastaFile;
|
||||
final boolean printProgress;
|
||||
|
||||
// keep track of location in file
|
||||
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
|
||||
|
|
@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
|
|||
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
|
||||
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
|
||||
|
||||
public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
|
||||
this.progress = progress;
|
||||
public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
|
||||
this.fastaFile = fastaFile;
|
||||
fileLength = fastaFile.length();
|
||||
this.printProgress = printProgress;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
|
|||
|
||||
if (System.currentTimeMillis() - lastTimestamp > 10000) {
|
||||
int percentProgress = (int) (100*bytesRead/fileLength);
|
||||
if (progress != null)
|
||||
progress.percentProgress(percentProgress);
|
||||
if (printProgress)
|
||||
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
|
||||
lastTimestamp = System.currentTimeMillis();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,30 +31,85 @@ import org.broadinstitute.sting.commandline.Input;
|
|||
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: rpoplin
|
||||
* Date: Dec 1, 2009
|
||||
* Call R scripts to plot residual error versus the various covariates.
|
||||
*
|
||||
* <p>
|
||||
* After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
|
||||
* reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
|
||||
* show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
|
||||
* In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
|
||||
* CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
|
||||
* by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
|
||||
* error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
|
||||
* bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
|
||||
*
|
||||
* <p>
|
||||
* The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
|
||||
* each of the quality score estimates. It is defined as follows for N, the number of observations:
|
||||
*
|
||||
* <ul>
|
||||
* <li>light blue means N < 1,000</li>
|
||||
* <li>cornflower blue means 1,000 <= N < 10,000</li>
|
||||
* <li>dark blue means N >= 10,000</li>
|
||||
* <li>The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
|
||||
* meaningless and so aren't included in any of the numerical calculations.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
|
||||
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
|
||||
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
|
||||
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
|
||||
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
|
||||
*
|
||||
* <p>
|
||||
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
|
||||
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx4g -jar AnalyzeCovariates.jar \
|
||||
* -recalFile /path/to/recal.table.csv \
|
||||
* -outputDir /path/to/output_dir/ \
|
||||
* -resources resources/ \
|
||||
* -ignoreQ 5
|
||||
* </pre>
|
||||
*
|
||||
* Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
|
||||
*/
|
||||
|
||||
@DocumentedGATKFeature(
|
||||
groupName = "AnalyzeCovariates",
|
||||
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
|
||||
public class AnalyzeCovariates extends CommandLineProgram {
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
/////////////////////////////
|
||||
|
||||
/**
|
||||
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
|
||||
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
|
||||
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
|
||||
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
|
||||
*/
|
||||
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
|
||||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
|
|
@ -65,13 +120,22 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
|
||||
private int IGNORE_QSCORES_LESS_THAN = 5;
|
||||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
|
||||
|
||||
/**
|
||||
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
|
||||
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
|
||||
*/
|
||||
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
|
||||
private int MAX_QUALITY_SCORE = 50;
|
||||
|
||||
/**
|
||||
* This argument is useful for comparing before/after plots and you want the axes to match each other.
|
||||
*/
|
||||
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
||||
private int MAX_HISTOGRAM_VALUE = 0;
|
||||
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots")
|
||||
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
|
||||
private boolean DO_INDEL_QUALITY = false;
|
||||
|
||||
|
||||
|
|
@ -261,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
}
|
||||
|
||||
private void callRScripts() {
|
||||
RScriptExecutor.RScriptArgumentCollection argumentCollection =
|
||||
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
|
||||
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
|
||||
|
||||
int numReadGroups = 0;
|
||||
|
||||
|
||||
// for each read group
|
||||
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
|
||||
|
||||
Process p;
|
||||
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
|
||||
|
||||
String readGroup = readGroupKey.toString();
|
||||
|
|
@ -276,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
// for each covariate
|
||||
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
|
||||
Covariate cov = requestedCovariates.get(iii);
|
||||
try {
|
||||
|
||||
if (DO_INDEL_QUALITY) {
|
||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " +
|
||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
p.waitFor();
|
||||
|
||||
} else {
|
||||
final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
|
||||
if (DO_INDEL_QUALITY) {
|
||||
executor.callRScripts("plot_indelQuality.R", outputFilename,
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
} else {
|
||||
if( iii == 1 ) {
|
||||
// Analyze reported quality
|
||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
|
||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
||||
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
p.waitFor();
|
||||
} else { // Analyze all other covariates
|
||||
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
|
||||
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
p.waitFor();
|
||||
}
|
||||
// Analyze reported quality
|
||||
executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
|
||||
IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
} else { // Analyze all other covariates
|
||||
executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
} catch (IOException e) {
|
||||
System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option.");
|
||||
e.printStackTrace();
|
||||
System.exit(-1);
|
||||
}
|
||||
}
|
||||
} else { // at the maximum number of read groups so break out
|
||||
|
|
|
|||
|
|
@ -0,0 +1,4 @@
|
|||
/**
|
||||
* Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
|
||||
*/
|
||||
package org.broadinstitute.sting.analyzecovariates;
|
||||
|
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
/**
|
||||
* Indicates that a walker argument should is considered an advanced option.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @version 0.1
|
||||
*/
|
||||
@Documented
|
||||
@Inherited
|
||||
@Retention(RetentionPolicy.RUNTIME)
|
||||
@Target({ElementType.TYPE,ElementType.FIELD})
|
||||
public @interface Advanced {
|
||||
}
|
||||
|
|
@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
|
|||
|
||||
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
|
||||
public boolean matches( ArgumentDefinition definition, Object key ) {
|
||||
return definition.validation != null;
|
||||
// We can perform some sort of validation for anything that isn't a flag.
|
||||
return !definition.isFlag;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
public final String label;
|
||||
|
||||
/**
|
||||
* Maps indicies of command line arguments to values paired with that argument.
|
||||
* Maps indices of command line arguments to values paired with that argument.
|
||||
*/
|
||||
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
||||
|
||||
|
|
|
|||
|
|
@ -151,6 +151,14 @@ public class ArgumentSource {
|
|||
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is the given argument considered an advanced option when displaying on the command-line argument system.
|
||||
* @return True if so. False otherwise.
|
||||
*/
|
||||
public boolean isAdvanced() {
|
||||
return field.isAnnotationPresent(Advanced.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Is this command-line argument dependent on some primitive argument types?
|
||||
* @return True if this command-line argument depends on other arguments; false otherwise.
|
||||
|
|
@ -175,13 +183,17 @@ public class ArgumentSource {
|
|||
return typeDescriptor.createsTypeDefault(this);
|
||||
}
|
||||
|
||||
public String typeDefaultDocString() {
|
||||
return typeDescriptor.typeDefaultDocString(this);
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a default for the given type.
|
||||
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
||||
* @return A default value for the given type.
|
||||
*/
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine) {
|
||||
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType());
|
||||
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.Multiplex;
|
||||
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
|
@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
|
|||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.*;
|
||||
import java.util.*;
|
||||
|
|
@ -80,14 +83,26 @@ public abstract class ArgumentTypeDescriptor {
|
|||
*/
|
||||
public boolean createsTypeDefault(ArgumentSource source) { return false; }
|
||||
|
||||
/**
|
||||
* Returns a documentation-friendly value for the default of a type descriptor.
|
||||
* Must be overridden if createsTypeDefault return true. cannot be called otherwise
|
||||
* @param source Source of the command-line argument.
|
||||
* @return Friendly string of the default value, for documentation. If doesn't create a default, throws
|
||||
* and UnsupportedOperationException
|
||||
*/
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates a default for the given type.
|
||||
*
|
||||
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
|
||||
* @param source Source of the command-line argument.
|
||||
* @param type Type of value to create, in case the command-line argument system wants influence.
|
||||
* @return A default value for the given type.
|
||||
*/
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
|
||||
|
||||
/**
|
||||
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
|
||||
|
|
@ -109,7 +124,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @return The parsed object.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, source.field.getType(), matches);
|
||||
return parse(parsingEngine, source, source.field.getGenericType(), matches);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -131,18 +146,18 @@ public abstract class ArgumentTypeDescriptor {
|
|||
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
|
||||
Annotation argumentAnnotation = getArgumentAnnotation(source);
|
||||
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
|
||||
source.field.getType(),
|
||||
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
||||
ArgumentDefinition.getShortName(argumentAnnotation),
|
||||
ArgumentDefinition.getDoc(argumentAnnotation),
|
||||
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
||||
source.isFlag(),
|
||||
source.isMultiValued(),
|
||||
source.isHidden(),
|
||||
getCollectionComponentType(source.field),
|
||||
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
||||
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
||||
getValidOptions(source) );
|
||||
source.field.getType(),
|
||||
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
|
||||
ArgumentDefinition.getShortName(argumentAnnotation),
|
||||
ArgumentDefinition.getDoc(argumentAnnotation),
|
||||
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
|
||||
source.isFlag(),
|
||||
source.isMultiValued(),
|
||||
source.isHidden(),
|
||||
makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
|
||||
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
|
||||
ArgumentDefinition.getValidationRegex(argumentAnnotation),
|
||||
getValidOptions(source) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -151,7 +166,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @return The parameterized component type, or String.class if the parameterized type could not be found.
|
||||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -162,7 +177,7 @@ public abstract class ArgumentTypeDescriptor {
|
|||
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
|
||||
* @return The individual parsed object matching the argument match with Class type.
|
||||
*/
|
||||
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
|
||||
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
|
||||
|
||||
/**
|
||||
* If the argument source only accepts a small set of options, populate the returned list with
|
||||
|
|
@ -273,6 +288,123 @@ public abstract class ArgumentTypeDescriptor {
|
|||
public static boolean isArgumentHidden(Field field) {
|
||||
return field.isAnnotationPresent(Hidden.class);
|
||||
}
|
||||
|
||||
public Class makeRawTypeIfNecessary(Type t) {
|
||||
if ( t == null )
|
||||
return null;
|
||||
else if ( t instanceof ParameterizedType )
|
||||
return (Class)((ParameterizedType) t).getRawType();
|
||||
else if ( t instanceof Class ) {
|
||||
return (Class)t;
|
||||
} else {
|
||||
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
*/
|
||||
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* We only want RodBinding class objects
|
||||
* @param type The type to check.
|
||||
* @return true if the provided class is a RodBinding.class
|
||||
*/
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return isRodBinding(type);
|
||||
}
|
||||
|
||||
public static boolean isRodBinding( Class type ) {
|
||||
return RodBinding.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
|
||||
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
return "none";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
|
||||
try {
|
||||
String name = defaultDefinition.fullName;
|
||||
String tribbleType = null;
|
||||
Tags tags = getArgumentTags(matches);
|
||||
// must have one or two tag values here
|
||||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
name = tags.getPositionalTags().get(0);
|
||||
tribbleType = tags.getPositionalTags().get(1);
|
||||
} else {
|
||||
// case with 0 or 1 positional tags
|
||||
FeatureManager manager = new FeatureManager();
|
||||
|
||||
// -X:type style is a type when we cannot determine the type dynamically
|
||||
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
|
||||
if ( tag1 != null ) {
|
||||
if ( manager.getByName(tag1) != null ) // this a type
|
||||
tribbleType = tag1;
|
||||
else
|
||||
name = tag1;
|
||||
}
|
||||
|
||||
if ( tribbleType == null ) {
|
||||
// try to determine the file type dynamically
|
||||
File file = new File(value);
|
||||
if ( file.canRead() && file.isFile() ) {
|
||||
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
|
||||
if ( featureDescriptor != null ) {
|
||||
tribbleType = featureDescriptor.getName();
|
||||
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
|
||||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null )
|
||||
if ( ! file.canRead() | ! file.isFile() ) {
|
||||
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
|
||||
} else {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
|
||||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(result,tags);
|
||||
parsingEngine.addRodBinding(result);
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s.",
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Failed to parse value %s for argument %s. Message: %s",
|
||||
value, source.field.getName(), e.getMessage()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -282,9 +414,10 @@ public abstract class ArgumentTypeDescriptor {
|
|||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
if( type.isPrimitive() ) return true;
|
||||
if( type.isEnum() ) return true;
|
||||
if( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
||||
if ( type.isPrimitive() ) return true;
|
||||
if ( type.isEnum() ) return true;
|
||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
|
||||
try {
|
||||
type.getConstructor(String.class);
|
||||
|
|
@ -298,7 +431,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||
Class type = makeRawTypeIfNecessary(fulltype);
|
||||
if (source.isFlag())
|
||||
return true;
|
||||
|
||||
|
|
@ -339,7 +473,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
throw e;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
|
||||
value, source.field.getName()));
|
||||
value, source.field.getName()));
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(String.class, e);
|
||||
}
|
||||
|
|
@ -351,7 +485,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
|
||||
|
|
@ -382,10 +516,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
Class componentType;
|
||||
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
|
||||
Class type = makeRawTypeIfNecessary(fulltype);
|
||||
Type componentType;
|
||||
Object result;
|
||||
Tags tags;
|
||||
|
||||
if( Collection.class.isAssignableFrom(type) ) {
|
||||
|
||||
|
|
@ -399,7 +533,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
componentType = getCollectionComponentType( source.field );
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||
|
||||
Collection collection;
|
||||
try {
|
||||
|
|
@ -428,7 +562,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
else if( type.isArray() ) {
|
||||
componentType = type.getComponentType();
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
|
||||
|
||||
// Assemble a collection of individual values used in this computation.
|
||||
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
|
||||
|
|
@ -436,7 +570,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
for( ArgumentMatch value: match )
|
||||
values.add(value);
|
||||
|
||||
result = Array.newInstance(componentType,values.size());
|
||||
result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
|
||||
|
||||
int i = 0;
|
||||
for( ArgumentMatch value: values ) {
|
||||
|
|
@ -459,16 +593,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
@Override
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||
return (Class)parameterizedType.getActualTypeArguments()[0];
|
||||
}
|
||||
else
|
||||
return String.class;
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
if( parameterizedType.getActualTypeArguments().length > 1 )
|
||||
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
|
||||
return parameterizedType.getActualTypeArguments()[0];
|
||||
}
|
||||
else
|
||||
return String.class;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -510,12 +644,12 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||
if(multiplexer == null || multiplexedIds == null)
|
||||
throw new ReviewedStingException("No multiplexed ids available");
|
||||
|
||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||
Class componentType = getCollectionComponentType(source.field);
|
||||
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
|
||||
|
||||
for(Object id: multiplexedIds) {
|
||||
|
|
@ -527,15 +661,19 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
return multiplexedMapping;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
return "None";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
if(multiplexedIds == null)
|
||||
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
|
||||
|
||||
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
|
||||
|
||||
Class componentType = getCollectionComponentType(source.field);
|
||||
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
|
||||
|
||||
|
||||
for(Object id: multiplexedIds) {
|
||||
|
|
@ -606,7 +744,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
|
||||
*/
|
||||
@Override
|
||||
protected Class getCollectionComponentType( Field field ) {
|
||||
protected Type getCollectionComponentType( Field field ) {
|
||||
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
|
||||
if( field.getGenericType() instanceof ParameterizedType) {
|
||||
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import java.util.Locale;
|
|||
public abstract class CommandLineProgram {
|
||||
|
||||
/** The command-line program and the arguments it returned. */
|
||||
protected ParsingEngine parser = null;
|
||||
public ParsingEngine parser = null;
|
||||
|
||||
/** the default log level */
|
||||
@Argument(fullName = "logging_level",
|
||||
|
|
@ -144,6 +144,11 @@ public abstract class CommandLineProgram {
|
|||
|
||||
public static int result = -1;
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static void start(CommandLineProgram clp, String[] args) throws Exception {
|
||||
start(clp, args, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* This function is called to start processing the command line, and kick
|
||||
* off the execute message of the program.
|
||||
|
|
@ -153,7 +158,7 @@ public abstract class CommandLineProgram {
|
|||
* @throws Exception when an exception occurs
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
public static void start(CommandLineProgram clp, String[] args) throws Exception {
|
||||
public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception {
|
||||
|
||||
try {
|
||||
// setup our log layout
|
||||
|
|
@ -180,8 +185,9 @@ public abstract class CommandLineProgram {
|
|||
// - InvalidArgument in case these arguments are specified by plugins.
|
||||
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've
|
||||
// determined the full complement of arguments.
|
||||
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
|
||||
ParsingEngine.ValidationType.InvalidArgument));
|
||||
if ( ! dryRun )
|
||||
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
|
||||
ParsingEngine.ValidationType.InvalidArgument));
|
||||
parser.loadArgumentsIntoObject(clp);
|
||||
|
||||
// Initialize the logger using the loaded command line.
|
||||
|
|
@ -195,36 +201,40 @@ public abstract class CommandLineProgram {
|
|||
if (isHelpPresent(parser))
|
||||
printHelpAndExit(clp, parser);
|
||||
|
||||
parser.validate();
|
||||
if ( ! dryRun ) parser.validate();
|
||||
} else {
|
||||
parser.parse(args);
|
||||
|
||||
if (isHelpPresent(parser))
|
||||
printHelpAndExit(clp, parser);
|
||||
if ( ! dryRun ) {
|
||||
if (isHelpPresent(parser))
|
||||
printHelpAndExit(clp, parser);
|
||||
|
||||
parser.validate();
|
||||
parser.validate();
|
||||
}
|
||||
parser.loadArgumentsIntoObject(clp);
|
||||
|
||||
// Initialize the logger using the loaded command line.
|
||||
clp.setupLoggerLevel(layout);
|
||||
}
|
||||
|
||||
// if they specify a log location, output our data there
|
||||
if (clp.toFile != null) {
|
||||
FileAppender appender;
|
||||
try {
|
||||
appender = new FileAppender(layout, clp.toFile, false);
|
||||
logger.addAppender(appender);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
|
||||
if ( ! dryRun ) {
|
||||
// if they specify a log location, output our data there
|
||||
if (clp.toFile != null) {
|
||||
FileAppender appender;
|
||||
try {
|
||||
appender = new FileAppender(layout, clp.toFile, false);
|
||||
logger.addAppender(appender);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
|
||||
}
|
||||
}
|
||||
|
||||
// regardless of what happens next, generate the header information
|
||||
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
|
||||
|
||||
// call the execute
|
||||
CommandLineProgram.result = clp.execute();
|
||||
}
|
||||
|
||||
// regardless of what happens next, generate the header information
|
||||
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
|
||||
|
||||
// call the execute
|
||||
CommandLineProgram.result = clp.execute();
|
||||
}
|
||||
catch (ArgumentException e) {
|
||||
clp.parser.printHelp(clp.getApplicationDetails());
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public @interface Output {
|
|||
* --help argument is specified.
|
||||
* @return Doc string associated with this command-line argument.
|
||||
*/
|
||||
String doc() default "An output file presented to the walker. Will overwrite contents if file exists.";
|
||||
String doc() default "An output file created by the walker. Will overwrite contents if file exists";
|
||||
|
||||
/**
|
||||
* Is this argument required. If true, the command-line argument system will
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
|
@ -41,11 +42,16 @@ import java.util.*;
|
|||
* A parser for Sting command-line arguments.
|
||||
*/
|
||||
public class ParsingEngine {
|
||||
/**
|
||||
* The loaded argument sources along with their back definitions.
|
||||
*/
|
||||
private Map<ArgumentDefinition,ArgumentSource> argumentSourcesByDefinition = new HashMap<ArgumentDefinition,ArgumentSource>();
|
||||
|
||||
/**
|
||||
* A list of defined arguments against which command lines are matched.
|
||||
* Package protected for testing access.
|
||||
*/
|
||||
ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
|
||||
public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
|
||||
|
||||
/**
|
||||
* A list of matches from defined arguments to command-line text.
|
||||
|
|
@ -59,11 +65,17 @@ public class ParsingEngine {
|
|||
*/
|
||||
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
|
||||
|
||||
/**
|
||||
* All of the RodBinding objects we've seen while parsing
|
||||
*/
|
||||
private List<RodBinding> rodBindings = new ArrayList<RodBinding>();
|
||||
|
||||
/**
|
||||
* Class reference to the different types of descriptors that the create method can create.
|
||||
* The type of set used must be ordered (but not necessarily sorted).
|
||||
*/
|
||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||
new RodBindingArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor(),
|
||||
new MultiplexArgumentTypeDescriptor()) );
|
||||
|
||||
|
|
@ -80,6 +92,7 @@ public class ParsingEngine {
|
|||
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
|
||||
|
||||
public ParsingEngine( CommandLineProgram clp ) {
|
||||
RodBinding.resetNameCounter();
|
||||
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
|
||||
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
|
||||
|
||||
|
|
@ -107,8 +120,13 @@ public class ParsingEngine {
|
|||
*/
|
||||
public void addArgumentSource( String sourceName, Class sourceClass ) {
|
||||
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
|
||||
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
|
||||
argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
|
||||
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
|
||||
List<ArgumentDefinition> argumentDefinitions = argumentSource.createArgumentDefinitions();
|
||||
for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
|
||||
argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
|
||||
argumentsFromSource.add( argumentDefinition );
|
||||
}
|
||||
}
|
||||
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
|
||||
}
|
||||
|
||||
|
|
@ -199,16 +217,25 @@ public class ParsingEngine {
|
|||
throw new InvalidArgumentException( invalidArguments );
|
||||
}
|
||||
|
||||
// Find invalid argument values (arguments that fail the regexp test.
|
||||
// Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
|
||||
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
|
||||
Collection<ArgumentDefinition> verifiableArguments =
|
||||
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
|
||||
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
|
||||
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
|
||||
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
|
||||
// Check to see whether an argument value was specified. Argument values must be provided
|
||||
// when the argument name is specified and the argument is not a flag type.
|
||||
for(ArgumentMatch verifiableMatch: verifiableMatches) {
|
||||
ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
|
||||
if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
|
||||
invalidValues.add(new Pair<ArgumentDefinition,String>(verifiableArgument,null));
|
||||
}
|
||||
|
||||
// Ensure that the field contents meet the validation criteria specified by the regular expression.
|
||||
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
|
||||
for( String value: verifiableMatch.values() ) {
|
||||
if( !value.matches(verifiableArgument.validation) )
|
||||
if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
|
||||
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
|
||||
}
|
||||
}
|
||||
|
|
@ -304,7 +331,17 @@ public class ParsingEngine {
|
|||
if(!tags.containsKey(key))
|
||||
return new Tags();
|
||||
return tags.get(key);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a RodBinding type argument to this parser. Called during parsing to allow
|
||||
* us to track all of the RodBindings discovered in the command line.
|
||||
* @param rodBinding the rodbinding to add. Must not be added twice
|
||||
*/
|
||||
@Requires("rodBinding != null")
|
||||
public void addRodBinding(final RodBinding rodBinding) {
|
||||
rodBindings.add(rodBinding);
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify the user that a deprecated command-line argument has been used.
|
||||
|
|
@ -327,7 +364,7 @@ public class ParsingEngine {
|
|||
*/
|
||||
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
|
||||
// Nothing to load
|
||||
if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired()))
|
||||
if( argumentMatches.size() == 0 && ! source.createsTypeDefault() )
|
||||
return;
|
||||
|
||||
// Target instance into which to inject the value.
|
||||
|
|
@ -344,6 +381,10 @@ public class ParsingEngine {
|
|||
}
|
||||
}
|
||||
|
||||
public Collection<RodBinding> getRodBindings() {
|
||||
return Collections.unmodifiableCollection(rodBindings);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a collection of the container instances of the given type stored within the given target.
|
||||
* @param source Argument source.
|
||||
|
|
@ -390,7 +431,6 @@ public class ParsingEngine {
|
|||
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
|
||||
}
|
||||
|
||||
|
||||
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
|
||||
// now simply call into the truly general routine extract argument bindings but with a null
|
||||
// object so bindings aren't computed
|
||||
|
|
@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
|
|||
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
|
||||
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
||||
invalidValue.first.fullName,
|
||||
invalidValue.second,
|
||||
invalidValue.first.validation) );
|
||||
if(invalidValue.getSecond() == null)
|
||||
sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
|
||||
invalidValue.first.fullName) );
|
||||
else
|
||||
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
|
||||
invalidValue.first.fullName,
|
||||
invalidValue.second,
|
||||
invalidValue.first.validation) );
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,187 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A RodBinding representing a walker argument that gets bound to a ROD track.
|
||||
*
|
||||
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
|
||||
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
|
||||
* is explicitly typed with type of the Tribble.Feature expected to be produced by this
|
||||
* argument. The GATK Engine takes care of initializing the binding and connecting it
|
||||
* to the RMD system.
|
||||
*
|
||||
* It is recommended that optional RodBindings be initialized to the value returned
|
||||
* by the static method makeUnbound().
|
||||
*
|
||||
* Note that this class is immutable.
|
||||
*/
|
||||
public final class RodBinding<T extends Feature> {
|
||||
protected final static String UNBOUND_VARIABLE_NAME = "";
|
||||
protected final static String UNBOUND_SOURCE = "UNBOUND";
|
||||
protected final static String UNBOUND_TRIBBLE_TYPE = "";
|
||||
|
||||
/**
|
||||
* Create an unbound Rodbinding of type. This is the correct programming
|
||||
* style for an optional RodBinding<T>
|
||||
*
|
||||
* At Input()
|
||||
* RodBinding<T> x = RodBinding.makeUnbound(T.class)
|
||||
*
|
||||
* The unbound binding is guaranteed to never match any binding. It uniquely
|
||||
* returns false to isBound().
|
||||
*
|
||||
* @param type the Class type produced by this unbound object
|
||||
* @param <T> any class extending Tribble Feature
|
||||
* @return the UNBOUND RodBinding producing objects of type T
|
||||
*/
|
||||
@Requires("type != null")
|
||||
protected final static <T extends Feature> RodBinding<T> makeUnbound(Class<T> type) {
|
||||
return new RodBinding<T>(type);
|
||||
}
|
||||
|
||||
/** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
|
||||
final private String name;
|
||||
/** where the data for this ROD is coming from. A file or special value if coming from stdin */
|
||||
final private String source;
|
||||
/** the string name of the tribble type, such as vcf, bed, etc. */
|
||||
final private String tribbleType;
|
||||
/** The command line tags associated with this RodBinding */
|
||||
final private Tags tags;
|
||||
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
|
||||
final private Class<T> type;
|
||||
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
|
||||
final private boolean bound;
|
||||
|
||||
/**
|
||||
* The name counter. This is how we create unique names for collections of RodBindings
|
||||
* on the command line. If you have provide the GATK with -X file1 and -X file2 to a
|
||||
* RodBinding argument as List<RodBinding<T>> then each binding will receive automatically
|
||||
* the name of X and X2.
|
||||
*/
|
||||
final private static Map<String, Integer> nameCounter = new HashMap<String, Integer>();
|
||||
|
||||
/** for UnitTests */
|
||||
final public static void resetNameCounter() {
|
||||
nameCounter.clear();
|
||||
}
|
||||
|
||||
@Requires("rawName != null")
|
||||
@Ensures("result != null")
|
||||
final private static synchronized String countedVariableName(final String rawName) {
|
||||
Integer count = nameCounter.get(rawName);
|
||||
if ( count == null ) {
|
||||
nameCounter.put(rawName, 1);
|
||||
return rawName;
|
||||
} else {
|
||||
nameCounter.put(rawName, count + 1);
|
||||
return rawName + (count + 1);
|
||||
}
|
||||
}
|
||||
|
||||
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||
public RodBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||
this.type = type;
|
||||
this.name = countedVariableName(rawName);
|
||||
this.source = source;
|
||||
this.tribbleType = tribbleType;
|
||||
this.tags = tags;
|
||||
this.bound = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
|
||||
* @param type class this unbound RodBinding creates
|
||||
*/
|
||||
@Requires({"type != null"})
|
||||
private RodBinding(Class<T> type) {
|
||||
this.type = type;
|
||||
this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
|
||||
this.source = UNBOUND_SOURCE;
|
||||
this.tribbleType = UNBOUND_TRIBBLE_TYPE;
|
||||
this.tags = new Tags();
|
||||
this.bound = false;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
|
||||
*/
|
||||
final public boolean isBound() {
|
||||
return bound;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the string name of the tribble type, such as vcf, bed, etc.
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public Class<T> getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return where the data for this ROD is coming from. A file or special value if coming from stdin
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The command line tags associated with this RodBinding. Will include the tags used to
|
||||
* determine the name and type of this RodBinding
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public Tags getTags() {
|
||||
return tags;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
|
||||
*/
|
||||
@Ensures({"result != null"})
|
||||
final public String getTribbleType() {
|
||||
return tribbleType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
|
||||
}
|
||||
}
|
||||
|
|
@ -25,21 +25,20 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk;
|
||||
|
||||
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
import org.broadinstitute.sting.utils.text.ListFileUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
|
|
@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
*/
|
||||
private final Collection<Object> argumentSources = new ArrayList<Object>();
|
||||
|
||||
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
|
||||
|
||||
/**
|
||||
* this is the function that the inheriting class can expect to have called
|
||||
* when the command line system has initialized.
|
||||
|
|
@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
|
||||
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
|
||||
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
|
||||
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
|
||||
|
||||
engine.setWalker(walker);
|
||||
walker.setToolkit(engine);
|
||||
|
|
@ -96,6 +96,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
loadArgumentsIntoObject(walker);
|
||||
argumentSources.add(walker);
|
||||
|
||||
Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
|
||||
|
||||
// todo: remove me when the old style system is removed
|
||||
if ( getArgumentCollection().RODBindings.size() > 0 ) {
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2.");
|
||||
logger.warn("Please use arguments defined by each specific walker instead.");
|
||||
for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
|
||||
logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
|
||||
}
|
||||
logger.warn("################################################################################");
|
||||
logger.warn("################################################################################");
|
||||
System.exit(1);
|
||||
}
|
||||
|
||||
engine.setReferenceMetaDataFiles(rodBindings);
|
||||
|
||||
for (ReadFilter filter: filters) {
|
||||
loadArgumentsIntoObject(filter);
|
||||
argumentSources.add(filter);
|
||||
|
|
@ -112,6 +130,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
|
||||
* This report will be written to either STDOUT or to the run repository, depending on the options
|
||||
|
|
@ -142,7 +161,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
|
|||
*/
|
||||
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
|
||||
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
|
||||
new SAMFileReaderArgumentTypeDescriptor(engine),
|
||||
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
|
||||
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.walkers.Attribution;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
import org.broadinstitute.sting.utils.help.*;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
* @date May 8, 2009
|
||||
* <p/>
|
||||
* Class CommandLineGATK
|
||||
* <p/>
|
||||
* The GATK engine itself. Manages map/reduce data access and runs walkers.
|
||||
*
|
||||
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
|
||||
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
|
||||
* the gatk engine should deal with any data related information.
|
||||
*/
|
||||
@DocumentedGATKFeature(
|
||||
groupName = "GATK Engine",
|
||||
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
|
||||
extraDocs = { UserException.class })
|
||||
public class CommandLineGATK extends CommandLineExecutable {
|
||||
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
|
||||
private String analysisName = null;
|
||||
|
|
@ -173,12 +175,12 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
StringBuilder additionalHelp = new StringBuilder();
|
||||
Formatter formatter = new Formatter(additionalHelp);
|
||||
|
||||
formatter.format("Description:%n");
|
||||
formatter.format("Available Reference Ordered Data types:%n");
|
||||
formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
|
||||
formatter.format("%n");
|
||||
|
||||
WalkerManager walkerManager = engine.getWalkerManager();
|
||||
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
|
||||
|
||||
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
|
||||
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
|
||||
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
|
||||
|
||||
return additionalHelp.toString();
|
||||
}
|
||||
|
|
@ -192,8 +194,6 @@ public class CommandLineGATK extends CommandLineExecutable {
|
|||
StringBuilder additionalHelp = new StringBuilder();
|
||||
Formatter formatter = new Formatter(additionalHelp);
|
||||
|
||||
formatter.format("Available analyses:%n");
|
||||
|
||||
// Get the list of walker names from the walker manager.
|
||||
WalkerManager walkerManager = engine.getWalkerManager();
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
|
|
@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
|
|||
throw new ArgumentException("Walker does not allow a reference but one was provided.");
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
|
||||
* 'allowed' is still present.
|
||||
*
|
||||
* @param rods Reference-ordered data to load.
|
||||
*/
|
||||
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
|
||||
// Check to make sure that all required metadata is present.
|
||||
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
|
||||
for (RMD required : allRequired) {
|
||||
boolean found = false;
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (rod.matchesNameAndRecordType(required.name(), required.type()))
|
||||
found = true;
|
||||
}
|
||||
if (!found)
|
||||
throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
|
||||
"Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
|
||||
}
|
||||
|
||||
// Check to see that no forbidden rods are present.
|
||||
for (ReferenceOrderedDataSource rod : rods) {
|
||||
if (!WalkerManager.isAllowed(walker, rod))
|
||||
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
|
||||
}
|
||||
}
|
||||
|
||||
protected void validateSuppliedIntervals() {
|
||||
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
|
||||
if(!(walker instanceof ReadWalker)) {
|
||||
|
|
@ -716,8 +689,6 @@ public class GenomeAnalysisEngine {
|
|||
validateSuppliedReads();
|
||||
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
|
||||
|
||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||
|
||||
for (ReadFilter filter : filters)
|
||||
filter.initialize(this);
|
||||
|
||||
|
|
@ -926,9 +897,6 @@ public class GenomeAnalysisEngine {
|
|||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
|
||||
// try and make the tracks given their requests
|
||||
// create of live instances of the tracks
|
||||
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
|
||||
|
||||
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
|
||||
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
|
||||
|
|
@ -939,7 +907,6 @@ public class GenomeAnalysisEngine {
|
|||
flashbackData()));
|
||||
|
||||
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
|
||||
validateSuppliedReferenceOrderedData(dataSources);
|
||||
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
|
||||
|
||||
return dataSources;
|
||||
|
|
@ -994,7 +961,7 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
/**
|
||||
* Get the list of intervals passed to the engine.
|
||||
* @return List of intervals.
|
||||
* @return List of intervals, or null if no intervals are in use
|
||||
*/
|
||||
public GenomeLocSortedSet getIntervals() {
|
||||
return this.intervals;
|
||||
|
|
|
|||
|
|
@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
|
|||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
|
||||
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
|
||||
import org.broadinstitute.sting.utils.help.SummaryTaglet;
|
||||
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
|
@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return A suitable display name for the package.
|
||||
*/
|
||||
public String getPackageDisplayName(String packageName) {
|
||||
// Try to find an override for the display name of this package.
|
||||
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
|
||||
String displayName;
|
||||
if(helpText.containsKey(displayNameKey)) {
|
||||
displayName = helpText.getString(displayNameKey);
|
||||
}
|
||||
else {
|
||||
// If no override exists...
|
||||
// ...try to compute the override from the text of the package name, while accounting for
|
||||
// unpackaged walkers.
|
||||
displayName = packageName.substring(packageName.lastIndexOf('.')+1);
|
||||
if(displayName.trim().equals("")) displayName = "<unpackaged>";
|
||||
}
|
||||
// ...try to compute the override from the text of the package name, while accounting for
|
||||
// unpackaged walkers.
|
||||
String displayName = packageName.substring(packageName.lastIndexOf('.')+1);
|
||||
if (displayName.trim().equals("")) displayName = "<unpackaged>";
|
||||
return displayName;
|
||||
}
|
||||
|
||||
|
|
@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return Package help text, or "" if none exists.
|
||||
*/
|
||||
public String getPackageSummaryText(String packageName) {
|
||||
String key = String.format("%s.%s",packageName,SummaryTaglet.NAME);
|
||||
String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
|
||||
if(!helpText.containsKey(key))
|
||||
return "";
|
||||
return helpText.getString(key);
|
||||
|
|
@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return Walker summary description, or "" if none exists.
|
||||
*/
|
||||
public String getWalkerSummaryText(Class<? extends Walker> walkerType) {
|
||||
String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME);
|
||||
String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
|
||||
if(!helpText.containsKey(walkerSummary))
|
||||
return "";
|
||||
return helpText.getString(walkerSummary);
|
||||
|
|
@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return Walker full description, or "" if none exists.
|
||||
*/
|
||||
public String getWalkerDescriptionText(Class<? extends Walker> walkerType) {
|
||||
String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME);
|
||||
String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME);
|
||||
if(!helpText.containsKey(walkerDescription))
|
||||
return "";
|
||||
return helpText.getString(walkerDescription);
|
||||
|
|
@ -188,19 +177,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return The list of allowed reference meta data.
|
||||
*/
|
||||
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
|
||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
||||
if (allowsDataSource == null)
|
||||
return Collections.<RMD>emptyList();
|
||||
return Arrays.asList(allowsDataSource.referenceMetaData());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a list of RODs allowed by the walker.
|
||||
* @param walker Walker to query.
|
||||
* @return The list of allowed reference meta data.
|
||||
*/
|
||||
public static List<RMD> getAllowsMetaData(Walker walker) {
|
||||
return getAllowsMetaData(walker.getClass());
|
||||
return Collections.<RMD>emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -237,24 +214,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return True if the walker forbids this data type. False otherwise.
|
||||
*/
|
||||
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
|
||||
Allows allowsDataSource = getWalkerAllowed(walkerClass);
|
||||
|
||||
// Allows is less restrictive than requires. If an allows
|
||||
// clause is not specified, any kind of data is allowed.
|
||||
if( allowsDataSource == null )
|
||||
return true;
|
||||
|
||||
// The difference between unspecified RMD and the empty set of metadata can't be detected.
|
||||
// Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
|
||||
// case in the future.
|
||||
if( allowsDataSource.referenceMetaData().length == 0 )
|
||||
return true;
|
||||
|
||||
for( RMD allowed: allowsDataSource.referenceMetaData() ) {
|
||||
if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -294,8 +254,7 @@ public class WalkerManager extends PluginManager<Walker> {
|
|||
* @return The list of required reference meta data.
|
||||
*/
|
||||
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
|
||||
Requires requiresDataSource = getWalkerRequirements(walkerClass);
|
||||
return Arrays.asList(requiresDataSource.referenceMetaData());
|
||||
return Collections.emptyList();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -23,8 +23,26 @@
|
|||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.datasources.reference;
|
||||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.*;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class DbsnpArgumentCollection {
|
||||
|
||||
/**
|
||||
* A dbSNP VCF file.
|
||||
*/
|
||||
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
|
||||
public RodBinding<VariantContext> dbsnp;
|
||||
|
||||
public interface ReferenceDataSourceProgressListener {
|
||||
public void percentProgress(int percent);
|
||||
}
|
||||
|
||||
|
|
@ -101,6 +101,8 @@ public class GATKArgumentCollection {
|
|||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
public File referenceFile = null;
|
||||
|
||||
@Deprecated
|
||||
@Hidden
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||
|
|
@ -117,11 +119,6 @@ public class GATKArgumentCollection {
|
|||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||
public boolean nonDeterministicRandomSeed = false;
|
||||
|
||||
|
||||
@Element(required = false)
|
||||
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
|
||||
public String DBSNPFile = null;
|
||||
|
||||
/**
|
||||
* The override mechanism in the GATK, by default, populates the command-line arguments, then
|
||||
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
|
||||
|
|
@ -345,14 +342,6 @@ public class GATKArgumentCollection {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
if (other.RODBindings.size() != RODBindings.size()) {
|
||||
return false;
|
||||
}
|
||||
for (int x = 0; x < RODBindings.size(); x++) {
|
||||
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (!other.samFiles.equals(this.samFiles)) {
|
||||
return false;
|
||||
}
|
||||
|
|
@ -380,9 +369,6 @@ public class GATKArgumentCollection {
|
|||
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.unsafe.equals(this.unsafe)) {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,49 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.Root;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class StandardVariantContextInputArgumentCollection {
|
||||
|
||||
/**
|
||||
* Variants from this VCF file are used by this tool as input.
|
||||
* The file must at least contain the standard VCF header lines, but
|
||||
* can be empty (i.e., no variants are contained in the file).
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
}
|
||||
|
||||
|
|
@ -1,8 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
|
|||
* @param loc Locus at which to track.
|
||||
* @return A tracker containing information about this locus.
|
||||
*/
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size());
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
|
||||
|
||||
for ( ReferenceOrderedDataState state: states )
|
||||
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
|
||||
return tracks;
|
||||
// todo -- warning, I removed the reference to the name from states
|
||||
bindings.add( state.iterator.seekForward(loc) );
|
||||
|
||||
return new RefMetaDataTracker(bindings, referenceContext);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
public interface ReferenceOrderedView extends View {
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
|
||||
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
|
|
@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
*/
|
||||
private RODMergingIterator rodQueue = null;
|
||||
|
||||
RefMetaDataTracker tracker = null;
|
||||
Collection<RODRecordList> allTracksHere;
|
||||
|
||||
GenomeLoc lastLoc = null;
|
||||
RODRecordList interval = null;
|
||||
|
||||
|
|
@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
}
|
||||
|
||||
rodQueue = new RODMergingIterator(iterators);
|
||||
|
||||
//throw new StingException("RodLocusView currently disabled");
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
|
||||
return tracker;
|
||||
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { allTracksHere.add(interval); }
|
||||
return new RefMetaDataTracker(allTracksHere, referenceContext);
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
|
|
@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
|
||||
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
|
||||
|
||||
// Update the tracker here for use
|
||||
Collection<RODRecordList> allTracksHere = getSpanningTracks(datum);
|
||||
tracker = createTracker(allTracksHere);
|
||||
|
||||
allTracksHere = getSpanningTracks(datum);
|
||||
GenomeLoc rodSite = datum.getLocation();
|
||||
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
|
||||
|
||||
|
|
@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
|
||||
}
|
||||
|
||||
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
|
||||
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
|
||||
for ( RODRecordList track : allTracksHere ) {
|
||||
if ( ! t.hasROD(track.getName()) )
|
||||
t.bind(track.getName(), track);
|
||||
}
|
||||
|
||||
// special case the interval again -- add it into the ROD
|
||||
if ( interval != null ) { t.bind(interval.getName(), interval); }
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
|
||||
return rodQueue.allElementsLTE(marker);
|
||||
}
|
||||
|
|
@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
return getSkippedBases(getLocOneBeyondShard());
|
||||
}
|
||||
|
||||
public RefMetaDataTracker getTracker() {
|
||||
return tracker;
|
||||
}
|
||||
|
||||
/**
|
||||
* Closes the current view.
|
||||
*/
|
||||
|
|
@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
|
|||
state.dataSource.close( state.iterator );
|
||||
|
||||
rodQueue = null;
|
||||
tracker = null;
|
||||
allTracksHere = null;
|
||||
}
|
||||
}
|
||||
|
|
@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
*/
|
||||
public FilePointer next() {
|
||||
FilePointer current = wrappedIterator.next();
|
||||
//while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
||||
// current = current.combine(parser,wrappedIterator.next());
|
||||
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
||||
current = current.combine(parser,wrappedIterator.next());
|
||||
return current;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -893,6 +893,7 @@ public class SAMDataSource {
|
|||
* Custom representation of interval bounds.
|
||||
* Makes it simpler to track current position.
|
||||
*/
|
||||
private int[] intervalContigIndices;
|
||||
private int[] intervalStarts;
|
||||
private int[] intervalEnds;
|
||||
|
||||
|
|
@ -917,12 +918,14 @@ public class SAMDataSource {
|
|||
if(foundMappedIntervals) {
|
||||
if(keepOnlyUnmappedReads)
|
||||
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
|
||||
this.intervalContigIndices = new int[intervals.size()];
|
||||
this.intervalStarts = new int[intervals.size()];
|
||||
this.intervalEnds = new int[intervals.size()];
|
||||
int i = 0;
|
||||
for(GenomeLoc interval: intervals) {
|
||||
intervalStarts[i] = (int)interval.getStart();
|
||||
intervalEnds[i] = (int)interval.getStop();
|
||||
intervalContigIndices[i] = interval.getContigIndex();
|
||||
intervalStarts[i] = interval.getStart();
|
||||
intervalEnds[i] = interval.getStop();
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
|
@ -961,11 +964,10 @@ public class SAMDataSource {
|
|||
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
|
||||
if(!keepOnlyUnmappedReads) {
|
||||
// Mapped read filter; check against GenomeLoc-derived bounds.
|
||||
if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
||||
(candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
|
||||
// This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
|
||||
if(readEndsOnOrAfterStartingBound(candidateRead)) {
|
||||
// This read ends after the current interval begins.
|
||||
// Promising, but this read must be checked against the ending bound.
|
||||
if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
|
||||
if(readStartsOnOrBeforeEndingBound(candidateRead)) {
|
||||
// Yes, this read is within both bounds. This must be our next read.
|
||||
nextRead = candidateRead;
|
||||
break;
|
||||
|
|
@ -993,6 +995,37 @@ public class SAMDataSource {
|
|||
candidateRead = iterator.next();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
|
||||
* end will be distorted, so rely only on the alignment start.
|
||||
* @param read The read to position-check.
|
||||
* @return True if the read starts after the current bounds. False otherwise.
|
||||
*/
|
||||
private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
|
||||
return
|
||||
// Read ends on a later contig, or...
|
||||
read.getReferenceIndex() > intervalContigIndices[currentBound] ||
|
||||
// Read ends of this contig...
|
||||
(read.getReferenceIndex() == intervalContigIndices[currentBound] &&
|
||||
// either after this location, or...
|
||||
(read.getAlignmentEnd() >= intervalStarts[currentBound] ||
|
||||
// read is unmapped but positioned and alignment start is on or after this start point.
|
||||
(read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the read lies before the end of the current bound.
|
||||
* @param read The read to position-check.
|
||||
* @return True if the read starts after the current bounds. False otherwise.
|
||||
*/
|
||||
private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
|
||||
return
|
||||
// Read starts on a prior contig, or...
|
||||
read.getReferenceIndex() < intervalContigIndices[currentBound] ||
|
||||
// Read starts on this contig and the alignment start is registered before this end point.
|
||||
(read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ import java.io.File;
|
|||
* Loads reference data from fasta file
|
||||
* Looks for fai and dict files, and tries to create them if they don't exist
|
||||
*/
|
||||
public class ReferenceDataSource implements ReferenceDataSourceProgressListener {
|
||||
public class ReferenceDataSource {
|
||||
private IndexedFastaSequenceFile index;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
|
|
@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
|||
// get exclusive lock
|
||||
if (!indexLock.exclusiveLock())
|
||||
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
|
||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this);
|
||||
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
|
||||
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
|
||||
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
|
||||
}
|
||||
|
|
@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
|
|||
public IndexedFastaSequenceFile getReference() {
|
||||
return this.index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Notify user of progress in creating fai file
|
||||
* @param percent Percent of fasta file read as a percent
|
||||
*/
|
||||
public void percentProgress(int percent) {
|
||||
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
|
|||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
|
|||
}
|
||||
|
||||
public Class getType() {
|
||||
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
|
||||
}
|
||||
|
||||
public Class getRecordType() {
|
||||
return builder.createCodec(getType(),getName()).getFeatureType();
|
||||
return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
|
||||
}
|
||||
|
||||
public File getFile() {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,82 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.examples;
|
||||
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.ArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
*
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class GATKDocsExample extends RodWalker<Integer, Integer> {
|
||||
/**
|
||||
* Put detailed documentation about the argument here. No need to duplicate the summary information
|
||||
* in doc annotation field, as that will be added before this text in the documentation page.
|
||||
*
|
||||
* Notes:
|
||||
* <ul>
|
||||
* <li>This field can contain HTML as a normal javadoc</li>
|
||||
* <li>Don't include information about the default value, as gatkdocs adds this automatically</li>
|
||||
* <li>Try your best to describe in detail the behavior of the argument, as ultimately confusing
|
||||
* docs here will just result in user posts on the forum</li>
|
||||
* </ul>
|
||||
*/
|
||||
@Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false)
|
||||
private boolean myWalkerArgument = false;
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer value, Integer sum) { return value + sum; }
|
||||
public void onTraversalDone(Integer result) { }
|
||||
}
|
||||
|
|
@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
if (!( walker instanceof TreeReducible ))
|
||||
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
|
||||
|
||||
traversalEngine.startTimers();
|
||||
ReduceTree reduceTree = new ReduceTree(this);
|
||||
initializeWalker(walker);
|
||||
|
||||
|
|
|
|||
|
|
@ -44,15 +44,16 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
* @param shardStrategy A strategy for sharding the data.
|
||||
*/
|
||||
public Object execute(Walker walker, ShardStrategy shardStrategy) {
|
||||
traversalEngine.startTimers();
|
||||
walker.initialize();
|
||||
Accumulator accumulator = Accumulator.create(engine,walker);
|
||||
|
||||
boolean done = walker.isDone();
|
||||
int counter = 0;
|
||||
for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) {
|
||||
if ( shard == null ) // we ran out of shards that aren't owned
|
||||
for (Shard shard : shardStrategy ) {
|
||||
if ( done || shard == null ) // we ran out of shards that aren't owned
|
||||
break;
|
||||
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
||||
|
|
@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
if ( walker.isDone() ) break;
|
||||
}
|
||||
windowMaker.close();
|
||||
}
|
||||
|
|
@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
accumulator.accumulate(dataProvider,result);
|
||||
dataProvider.close();
|
||||
}
|
||||
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
Object result = accumulator.finishTraversal();
|
||||
|
|
|
|||
|
|
@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*;
|
|||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.threading.*;
|
||||
|
||||
import javax.management.JMException;
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.ObjectName;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.util.Collection;
|
||||
|
||||
|
|
@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
private final MBeanServer mBeanServer;
|
||||
private final ObjectName mBeanName;
|
||||
|
||||
protected GenomeLocProcessingTracker processingTracker;
|
||||
|
||||
/**
|
||||
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
|
||||
* selected walker.
|
||||
|
|
@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
* @return The best-fit microscheduler.
|
||||
*/
|
||||
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
|
||||
if (engine.getArguments().processingTrackerFile != null) {
|
||||
if ( walker instanceof ReadWalker )
|
||||
throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers"));
|
||||
}
|
||||
|
||||
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
|
||||
if(walker.isReduceByInterval())
|
||||
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
|
||||
|
|
@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
|
|||
catch (JMException ex) {
|
||||
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
|
||||
}
|
||||
|
||||
//
|
||||
// create the processing tracker
|
||||
//
|
||||
if ( engine.getArguments().processingTrackerFile != null ) {
|
||||
logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably.");
|
||||
if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) {
|
||||
engine.getArguments().processingTrackerFile.delete();
|
||||
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
|
||||
}
|
||||
|
||||
PrintStream statusStream = null;
|
||||
if ( engine.getArguments().processingTrackerStatusFile != null ) {
|
||||
try {
|
||||
statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
|
||||
} catch ( FileNotFoundException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID);
|
||||
processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ;
|
||||
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID);
|
||||
} else {
|
||||
// create a NoOp version that doesn't do anything but say "yes"
|
||||
processingTracker = new NoOpGenomeLocProcessingTracker();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
|
|||
|
||||
public Object call() {
|
||||
try {
|
||||
traversalEngine.startTimersIfNecessary();
|
||||
long startTime = System.currentTimeMillis();
|
||||
|
||||
Object accumulator = walker.reduceInit();
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* Filter out FailsVendorQualityCheck reads.
|
||||
*/
|
||||
|
||||
public class FailsVendorQualityCheckReadFilter extends ReadFilter {
|
||||
public class FailsVendorQualityCheckFilter extends ReadFilter {
|
||||
public boolean filterOut( final SAMRecord read ) {
|
||||
return read.getReadFailsVendorQualityCheckFlag();
|
||||
}
|
||||
|
|
@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
|
|||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class MappingQualityReadFilter extends ReadFilter {
|
||||
public class MappingQualityFilter extends ReadFilter {
|
||||
|
||||
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
|
||||
public int MIN_MAPPING_QUALTY_SCORE = 10;
|
||||
|
|
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
|
|||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class MappingQualityUnavailableReadFilter extends ReadFilter {
|
||||
public class MappingQualityUnavailableFilter extends ReadFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
|
||||
}
|
||||
|
|
@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* @version 0.1
|
||||
*/
|
||||
|
||||
public class MappingQualityZeroReadFilter extends ReadFilter {
|
||||
public class MappingQualityZeroFilter extends ReadFilter {
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
return (rec.getMappingQuality() == 0);
|
||||
}
|
||||
|
|
@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
|
|||
* Filter out duplicate reads.
|
||||
*/
|
||||
|
||||
public class NotPrimaryAlignmentReadFilter extends ReadFilter {
|
||||
public class NotPrimaryAlignmentFilter extends ReadFilter {
|
||||
public boolean filterOut( final SAMRecord read ) {
|
||||
return read.getNotPrimaryAlignmentFlag();
|
||||
}
|
||||
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class PlatformFilter extends ReadFilter {
|
||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
|
||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
|
||||
protected String[] PLFilterNames;
|
||||
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
|
|
|
|||
|
|
@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters;
|
|||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
/**
|
||||
* A SamRecordFilter that also depends on the header.
|
||||
*/
|
||||
@DocumentedGATKFeature(
|
||||
groupName = "Read filters",
|
||||
summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" )
|
||||
public abstract class ReadFilter implements SamRecordFilter {
|
||||
/**
|
||||
* Sets the header for use by this filter.
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* A read filter (transformer) that sets all reads mapping quality to a given value.
|
||||
*
|
||||
* <p>
|
||||
* If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
|
||||
* all your mapping qualities to a given value. Default being 60.
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* BAM file(s)
|
||||
* </p>
|
||||
*
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* BAM file(s) with all reads mapping qualities reassigned
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -rf ReassignMappingQuality
|
||||
* -DMQ 35
|
||||
* </pre>
|
||||
*
|
||||
* @author carneiro
|
||||
* @since 8/8/11
|
||||
*/
|
||||
|
||||
public class ReassignMappingQualityFilter extends ReadFilter {
|
||||
|
||||
@Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
|
||||
public int defaultMappingQuality = 60;
|
||||
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
rec.setMappingQuality(defaultMappingQuality);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
writer.writeHeader(stub.getVCFHeader());
|
||||
}
|
||||
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
writer.add(vc, ref);
|
||||
public void add(VariantContext vc) {
|
||||
writer.add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
|
||||
|
||||
for ( VariantContext vc : source.iterator() ) {
|
||||
target.writer.add(vc, vc.getReferenceBaseForIndel());
|
||||
target.writer.add(vc);
|
||||
}
|
||||
|
||||
source.close();
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
|
||||
|
|
@ -69,16 +70,21 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) {
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
return "stdout";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||
if(!source.isRequired())
|
||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
|
||||
engine.addOutput(stub);
|
||||
return createInstanceOfClass(type,stub);
|
||||
return createInstanceOfClass((Class)type,stub);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
|
||||
String fileName = getArgumentValue( definition, matches );
|
||||
|
||||
|
|
@ -91,7 +97,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
engine.addOutput(stub);
|
||||
|
||||
Object result = createInstanceOfClass(type,stub);
|
||||
Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
|
||||
// WARNING: Side effects required by engine!
|
||||
parsingEngine.addTags(result,getArgumentTags(matches));
|
||||
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.Type;
|
||||
|
||||
/**
|
||||
* Describe how to parse SAMFileReaders.
|
||||
|
|
@ -52,14 +53,13 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
this.engine = engine;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return SAMFileReader.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
|
||||
|
||||
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.annotation.Annotation;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -93,7 +94,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
return "stdout";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||
if(!source.isRequired())
|
||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
|
||||
|
|
@ -102,7 +108,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
// Extract all possible parameters that could be passed to a BAM file writer?
|
||||
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
|
||||
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
|
||||
import java.io.File;
|
||||
import java.io.OutputStream;
|
||||
import java.lang.reflect.Type;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.HashSet;
|
||||
|
|
@ -108,7 +109,12 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
|
||||
public String typeDefaultDocString(ArgumentSource source) {
|
||||
return "stdout";
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
|
||||
if(!source.isRequired())
|
||||
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
|
||||
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
|
||||
|
|
@ -124,7 +130,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
* @return Transform from the matches into the associated argument.
|
||||
*/
|
||||
@Override
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
|
||||
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
|
||||
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
|
||||
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
|
||||
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
|
|
@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
vcfHeader = header;
|
||||
|
||||
// Check for the command-line argument header line. If not present, add it in.
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
|
||||
foundCommandLineHeaderLine = true;
|
||||
if ( !skipWritingHeader ) {
|
||||
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
|
||||
boolean foundCommandLineHeaderLine = false;
|
||||
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
|
||||
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
|
||||
foundCommandLineHeaderLine = true;
|
||||
}
|
||||
if ( !foundCommandLineHeaderLine )
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
// also put in the reference contig header lines
|
||||
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
|
||||
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
|
||||
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
|
||||
|
||||
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
|
||||
}
|
||||
if(!foundCommandLineHeaderLine && !skipWritingHeader)
|
||||
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
|
||||
|
||||
outputTracker.getStorage(this).writeHeader(vcfHeader);
|
||||
}
|
||||
|
|
@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
/**
|
||||
* @{inheritDoc}
|
||||
*/
|
||||
public void add(VariantContext vc, byte ref) {
|
||||
outputTracker.getStorage(this).add(vc,ref);
|
||||
public void add(VariantContext vc) {
|
||||
outputTracker.getStorage(this).add(vc);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
|
||||
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
|
||||
}
|
||||
|
||||
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
|
||||
String val;
|
||||
if ( assembly != null )
|
||||
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
|
||||
else
|
||||
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
|
||||
return new VCFHeaderLine("contig", val);
|
||||
}
|
||||
|
||||
private String getReferenceAssembly(String refPath) {
|
||||
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
|
||||
String assembly = null;
|
||||
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
|
||||
assembly = "b37";
|
||||
else if ( refPath.indexOf("b36") != -1 )
|
||||
assembly = "b36";
|
||||
else if ( refPath.indexOf("hg18") != -1 )
|
||||
assembly = "hg18";
|
||||
else if ( refPath.indexOf("hg19") != -1 )
|
||||
assembly = "hg19";
|
||||
return assembly;
|
||||
}
|
||||
}
|
||||
|
|
@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format;
|
|||
import org.simpleframework.xml.stream.HyphenStyle;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.InetAddress;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
|
|
@ -154,9 +153,13 @@ public class GATKRunReport {
|
|||
private long nReads;
|
||||
|
||||
public enum PhoneHomeOption {
|
||||
/** Disable phone home */
|
||||
NO_ET,
|
||||
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
|
||||
STANDARD,
|
||||
/** Force output to STDOUT. For debugging only */
|
||||
STDOUT,
|
||||
/** Force output to S3. For debugging only */
|
||||
AWS_S3 // todo -- remove me -- really just for testing purposes
|
||||
}
|
||||
|
||||
|
|
@ -226,22 +229,6 @@ public class GATKRunReport {
|
|||
}
|
||||
|
||||
|
||||
/**
|
||||
* Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
|
||||
* unresolvable gets returned instead.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
private String resolveHostname() {
|
||||
try {
|
||||
return InetAddress.getLocalHost().getCanonicalHostName();
|
||||
}
|
||||
catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
|
||||
return "unresolvable";
|
||||
// handle exception
|
||||
}
|
||||
}
|
||||
|
||||
public void postReport(PhoneHomeOption type) {
|
||||
logger.debug("Posting report of type " + type);
|
||||
switch (type) {
|
||||
|
|
@ -321,7 +308,7 @@ public class GATKRunReport {
|
|||
|
||||
private void postReportToAWSS3() {
|
||||
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
|
||||
this.hostName = resolveHostname(); // we want to fill in the host name
|
||||
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
|
||||
File localFile = postReportToLocalDisk(new File("./"));
|
||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||
if ( localFile != null ) { // we succeeded in creating the local file
|
||||
|
|
|
|||
|
|
@ -1,238 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.lang.reflect.Constructor;
|
||||
import java.util.Iterator;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
|
||||
* files. The only assumption made is that every line in the file provides a complete and separate data record. The records
|
||||
* can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
|
||||
* position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
|
||||
* otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
|
||||
* provided that is capable of parsing itself from a single line of data. This implementation will be used,
|
||||
* through reflection mechanism, as a callback to do all the work.
|
||||
*
|
||||
* The model is, hence, as follows:
|
||||
*
|
||||
* String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
|
||||
* an instance of RodImplementation with data values from dataRecord line).
|
||||
*
|
||||
*
|
||||
* instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
|
||||
* that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
|
||||
* representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
|
||||
* track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
|
||||
* multiple streams of (possibly heterogeneous) annotation data bound to an application.
|
||||
*
|
||||
* This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
|
||||
* to the ROD implementation class for processing.
|
||||
*
|
||||
* This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
|
||||
* and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
|
||||
* iterators/wrappers will perform all the necessary checks.
|
||||
*
|
||||
* Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
|
||||
* mechanism is as follows:
|
||||
* 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
|
||||
* and return the header object (to be kept by the iterator)
|
||||
* 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
|
||||
* and
|
||||
* 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
|
||||
* method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
|
||||
* other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
|
||||
*
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: asivache
|
||||
* Date: Sep 10, 2009
|
||||
* Time: 1:22:23 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
|
||||
|
||||
private PushbackIterator<String> reader;
|
||||
|
||||
// stores name of the track this iterator reads (will be also returned by getName() of ROD objects
|
||||
// generated by this iterator)
|
||||
private String name;
|
||||
|
||||
// we keep the file object, only to use file name in error reports
|
||||
private File file;
|
||||
|
||||
// rod type; this is what we will instantiate for RODs at runtime
|
||||
private Class<ROD> type;
|
||||
|
||||
private Object header = null; // Some RODs may use header
|
||||
|
||||
// field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
|
||||
private String fieldDelimiter;
|
||||
|
||||
// constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
|
||||
private Constructor<ROD> named_constructor;
|
||||
|
||||
// keep track of the lines we are reading. used for error messages only.
|
||||
private long linenum = 0;
|
||||
|
||||
private boolean allow_empty = true;
|
||||
private boolean allow_comments = true;
|
||||
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
|
||||
|
||||
public RODRecordIterator(File file, String name, Class<ROD> type) {
|
||||
try {
|
||||
reader = new PushbackIterator<String>(new XReadLines(file));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
this.file = file;
|
||||
this.name = name;
|
||||
this.type = type;
|
||||
try {
|
||||
named_constructor = type.getConstructor(String.class);
|
||||
}
|
||||
catch (java.lang.NoSuchMethodException e) {
|
||||
throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
|
||||
}
|
||||
ROD rod = instantiateROD(name);
|
||||
fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
|
||||
try {
|
||||
header = rod.initialize(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns <tt>true</tt> if the iteration has more elements. (In other
|
||||
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
|
||||
* rather than throwing an exception.)
|
||||
*
|
||||
* @return <tt>true</tt> if the iterator has more elements.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
if ( allow_empty || allow_comments ) {
|
||||
while ( reader.hasNext() ) {
|
||||
String line = reader.next();
|
||||
if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
|
||||
if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
|
||||
// the line is not empty and not a comment line, so we have next after all
|
||||
reader.pushback(line);
|
||||
return true;
|
||||
}
|
||||
return false; // oops, we end up here if there's nothing left
|
||||
} else {
|
||||
return reader.hasNext();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the next valid ROD record in the file, skipping empty and comment lines.
|
||||
*
|
||||
* @return the next element in the iteration.
|
||||
* @throws java.util.NoSuchElementException
|
||||
* iteration has no more elements.
|
||||
*/
|
||||
public ROD next() {
|
||||
ROD n = null;
|
||||
boolean parsed_ok = false;
|
||||
String line ;
|
||||
|
||||
while ( ! parsed_ok && reader.hasNext() ) {
|
||||
line = reader.next();
|
||||
linenum++;
|
||||
while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
|
||||
allow_comments && line.charAt(0) == '#' ) {
|
||||
if ( reader.hasNext() ) {
|
||||
line = reader.next();
|
||||
linenum++;
|
||||
} else {
|
||||
line = null;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
|
||||
|
||||
String parts[] = line.split(fieldDelimiter);
|
||||
|
||||
try {
|
||||
n = instantiateROD(name);
|
||||
parsed_ok = n.parseLine(header,parts) ;
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
|
||||
"\nOffending line: "+line+
|
||||
"\nReason ("+e.getClass().getName()+")", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Removes from the underlying collection the last element returned by the
|
||||
* iterator (optional operation). This method can be called only once per
|
||||
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
|
||||
* the underlying collection is modified while the iteration is in
|
||||
* progress in any way other than by calling this method.
|
||||
*
|
||||
* @throws UnsupportedOperationException if the <tt>remove</tt>
|
||||
* operation is not supported by this Iterator.
|
||||
* @throws IllegalStateException if the <tt>next</tt> method has not
|
||||
* yet been called, or the <tt>remove</tt> method has already
|
||||
* been called after the last call to the <tt>next</tt>
|
||||
* method.
|
||||
*/
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
|
||||
}
|
||||
|
||||
/** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
|
||||
* of the ROD track.
|
||||
* @param name
|
||||
* @return
|
||||
*/
|
||||
private ROD instantiateROD(final String name) {
|
||||
try {
|
||||
return (ROD) named_constructor.newInstance(name);
|
||||
} catch (Exception e) {
|
||||
throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,13 +1,15 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -18,348 +20,402 @@ import java.util.*;
|
|||
* The standard interaction model is:
|
||||
*
|
||||
* Traversal system arrives at a site, which has a bunch of RMDs covering it
|
||||
Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs
|
||||
* Traversal passes tracker to the walker
|
||||
* walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was
|
||||
* bound at this site.
|
||||
* Traversal passes creates a tracker and passes it to the walker
|
||||
* walker calls get(rodBinding) to obtain the RMDs values at this site for the track
|
||||
* associated with rodBinding.
|
||||
*
|
||||
* Note that this is an immutable class. Once created the underlying data structures
|
||||
* cannot be modified
|
||||
*
|
||||
* User: mdepristo
|
||||
* Date: Apr 3, 2009
|
||||
* Time: 3:05:23 PM
|
||||
*/
|
||||
public class RefMetaDataTracker {
|
||||
// TODO: this should be a list, not a map, actually
|
||||
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
|
||||
|
||||
final Map<String, RODRecordList> map;
|
||||
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
final ReferenceContext ref;
|
||||
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
|
||||
|
||||
public RefMetaDataTracker(int nBindings) {
|
||||
if ( nBindings == 0 )
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Special ENGINE interaction functions
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
|
||||
this.ref = ref;
|
||||
|
||||
// set up the map
|
||||
if ( allBindings.isEmpty() )
|
||||
map = Collections.emptyMap();
|
||||
else
|
||||
map = new HashMap<String, RODRecordList>(nBindings);
|
||||
else {
|
||||
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
|
||||
for ( RODRecordList rod : allBindings ) {
|
||||
if ( rod != null && ! rod.isEmpty() )
|
||||
tmap.put(canonicalName(rod.getName()), rod);
|
||||
}
|
||||
|
||||
// ensure that no one modifies the map itself
|
||||
map = Collections.unmodifiableMap(tmap);
|
||||
}
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Generic accessors
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Gets all of the Tribble features spanning this locus, returning them as a list of specific
|
||||
* type T extending Feature. This function looks across all tracks to find the Features, so
|
||||
* if you have two tracks A and B each containing 1 Feature, then getValues will return
|
||||
* a list containing both features.
|
||||
*
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case. If you want
|
||||
* to get all Features without any danger of such an exception use the root Tribble
|
||||
* interface Feature.
|
||||
*
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"type != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP RMD this will be a RodDbSNP, etc.
|
||||
* Provides the same functionality as @link #getValues(Class<T>) but will only include
|
||||
* Features that start as the GenomeLoc provide onlyAtThisLoc.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param onlyAtThisLoc
|
||||
* @param <T> as above
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
public List<Object> getReferenceMetaData(final String name) {
|
||||
RODRecordList list = getTrackDataByName(name, true);
|
||||
List<Object> objects = new ArrayList<Object>();
|
||||
if (list == null) return objects;
|
||||
for (GATKFeature feature : list)
|
||||
objects.add(feature.getUnderlyingObject());
|
||||
return objects;
|
||||
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the reference meta data associated with a track name.
|
||||
* @param name the name of the track we're looking for
|
||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
||||
* the passed in parameter (false).
|
||||
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
|
||||
* dbSNP rod this will be a RodDbSNP, etc.
|
||||
* Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
|
||||
* elements of the list to return. That is, if there would be two elements in the result of
|
||||
* @link #getValues(Class), one of these two is selected, and which one it will be isn't
|
||||
* specified. Consequently, this method is only really safe if (1) you absolutely know
|
||||
* that only one binding will meet the constraints of @link #getValues(Class) or (2)
|
||||
* you truly don't care which of the multiple bindings available you are going to examine.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* If there are no bindings here, getFirstValue() return null
|
||||
*
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @return A random single element the RODs bound here, or null if none are bound.
|
||||
*/
|
||||
public List<Object> getReferenceMetaData(final String name, boolean requireExactMatch) {
|
||||
RODRecordList list = getTrackDataByName(name, requireExactMatch);
|
||||
List<Object> objects = new ArrayList<Object>();
|
||||
if (list == null) return objects;
|
||||
for (GATKFeature feature : list)
|
||||
objects.add(feature.getUnderlyingObject());
|
||||
return objects;
|
||||
@Requires({"type != null"})
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type) {
|
||||
return safeGetFirst(getValues(type));
|
||||
}
|
||||
|
||||
/**
|
||||
* get all the GATK features associated with a specific track name
|
||||
* @param name the name of the track we're looking for
|
||||
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
|
||||
* the passed in parameter (false).
|
||||
* @return a list of GATKFeatures for the target rmd
|
||||
* Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
|
||||
* of eligible Features and @link #getFirstValue(Class) to select a single
|
||||
* element from the interval list.
|
||||
*
|
||||
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
|
||||
* @param type The type of the underlying objects bound here
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
|
||||
*/
|
||||
public List<GATKFeature> getGATKFeatureMetaData(final String name, boolean requireExactMatch) {
|
||||
List<GATKFeature> feat = getTrackDataByName(name,requireExactMatch);
|
||||
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null
|
||||
@Requires({"type != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, onlyAtThisLoc));
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* get a singleton record, given the name and a type. This function will return the first record at the current position seen,
|
||||
* and emit a logger warning if there were more than one option.
|
||||
* Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
|
||||
* a list of specific type T extending Feature.
|
||||
*
|
||||
* WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are
|
||||
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
|
||||
* picked may change from time to time! BE WARNED!
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param clazz the underlying type to return
|
||||
* @param <T> the type to parameterize on, matching the clazz argument
|
||||
* @return a record of type T, or null if no record is present.
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Deprecated
|
||||
public <T> T lookup(final String name, Class<T> clazz) {
|
||||
RODRecordList objects = getTrackDataByName(name, true);
|
||||
@Requires({"rodBinding != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding) {
|
||||
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), null, false, false);
|
||||
}
|
||||
|
||||
// if emtpy or null return null;
|
||||
if (objects == null || objects.size() < 1) return null;
|
||||
/**
|
||||
* Gets all of the Tribble features bound to any RodBinding in rodBindings,
|
||||
* spanning this locus, returning them as a list of specific type T extending Feature.
|
||||
*
|
||||
* Note that this function assumes that all of the bound features are instances of or
|
||||
* subclasses of T. A ClassCastException will occur if this isn't the case.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings) {
|
||||
List<T> results = new ArrayList<T>(1);
|
||||
for ( RodBinding<T> rodBinding : rodBindings )
|
||||
results.addAll(getValues(rodBinding));
|
||||
return results;
|
||||
}
|
||||
|
||||
if (objects.size() > 1)
|
||||
logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options");
|
||||
/**
|
||||
* The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
|
||||
}
|
||||
|
||||
Object obj = objects.get(0).getUnderlyingObject();
|
||||
if (!(clazz.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
/**
|
||||
* The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
|
||||
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||
@Ensures("result != null")
|
||||
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||
List<T> results = new ArrayList<T>(1);
|
||||
for ( RodBinding<T> rodBinding : rodBindings )
|
||||
results.addAll(getValues(rodBinding, onlyAtThisLoc));
|
||||
return results;
|
||||
}
|
||||
|
||||
return (T)obj;
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> as above
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null"})
|
||||
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding) {
|
||||
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(List) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||
* @param <T> as above
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null"})
|
||||
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings) {
|
||||
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||
T val = getFirstValue(rodBinding);
|
||||
if ( val != null )
|
||||
return val;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
|
||||
* of eligible Features and select a single element from the resulting set
|
||||
* of eligible features.
|
||||
*
|
||||
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
|
||||
* @param <T> as above
|
||||
* @param onlyAtThisLoc only Features starting at this site are considered
|
||||
* @return A random single element the eligible Features found, or null if none are bound.
|
||||
*/
|
||||
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
|
||||
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
|
||||
for ( RodBinding<T> rodBinding : rodBindings ) {
|
||||
T val = getFirstValue(rodBinding, onlyAtThisLoc);
|
||||
if ( val != null )
|
||||
return val;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Is there a binding at this site to a ROD/track with the specified name?
|
||||
*
|
||||
* @param name the name of the rod
|
||||
* @return true if it has the rod
|
||||
* @param rodBinding the rod binding we want to know about
|
||||
* @return true if any Features are bound in this tracker to rodBinding
|
||||
*/
|
||||
public boolean hasROD(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
|
||||
* at the current site, they all will be added to the list as separate elements.
|
||||
*
|
||||
* @return collection of all rods
|
||||
*/
|
||||
public Collection<GATKFeature> getAllRods() {
|
||||
List<GATKFeature> l = new ArrayList<GATKFeature>();
|
||||
for ( RODRecordList rl : map.values() ) {
|
||||
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
|
||||
l.addAll(rl);
|
||||
}
|
||||
return l;
|
||||
|
||||
@Requires({"rodBinding != null"})
|
||||
public boolean hasValues(final RodBinding rodBinding) {
|
||||
return map.containsKey(canonicalName(rodBinding.getName()));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
|
||||
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
|
||||
*
|
||||
* @return collection of all tracks
|
||||
* @return List of all tracks
|
||||
*/
|
||||
public Collection<RODRecordList> getBoundRodTracks() {
|
||||
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
|
||||
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && value.size() != 0 ) bound.add(value);
|
||||
}
|
||||
|
||||
return bound;
|
||||
public List<RODRecordList> getBoundRodTracks() {
|
||||
return new ArrayList<RODRecordList>(map.values());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the number of ROD bindings (name -> value) where value is not empty in this tracker
|
||||
* The number of tracks with at least one value bound here
|
||||
* @return the number of tracks with at least one bound Feature
|
||||
*/
|
||||
public int getNBoundRodTracks() {
|
||||
return getNBoundRodTracks(null);
|
||||
public int getNTracksWithBoundFeatures() {
|
||||
return map.size();
|
||||
}
|
||||
|
||||
public int getNBoundRodTracks(final String excludeIn ) {
|
||||
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// old style accessors
|
||||
//
|
||||
// TODO -- DELETE ME
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
int n = 0;
|
||||
for ( RODRecordList value : map.values() ) {
|
||||
if ( value != null && ! value.isEmpty() ) {
|
||||
if ( exclude == null || ! value.getName().equals(exclude) )
|
||||
n++;
|
||||
}
|
||||
}
|
||||
|
||||
return n;
|
||||
@Deprecated
|
||||
public boolean hasValues(final String name) {
|
||||
return map.containsKey(canonicalName(name));
|
||||
}
|
||||
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
|
||||
return safeGetFirst(getValues(type, name));
|
||||
}
|
||||
@Deprecated
|
||||
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
|
||||
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------------------------------------
|
||||
//
|
||||
//
|
||||
// Private utility functions
|
||||
//
|
||||
//
|
||||
// ------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
|
||||
* system to provide access to RMDs in a structured way to the walkers.
|
||||
* Helper function for getFirst() operations that takes a list of <T> and
|
||||
* returns the first element, or null if no such element exists.
|
||||
*
|
||||
* @param name the name of the track
|
||||
* @param rod the collection of RMD data
|
||||
*/
|
||||
public void bind(final String name, RODRecordList rod) {
|
||||
//logger.debug(String.format("Binding %s to %s", name, rod));
|
||||
map.put(canonicalName(name), rod);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
|
||||
* of entries per ROD.
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param ref reference context
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
|
||||
return getAllVariantContexts(ref, null, null, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all of the variant contexts that start at the current location
|
||||
* @param ref
|
||||
* @param curLocation
|
||||
* @param l
|
||||
* @param <T>
|
||||
* @return
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) {
|
||||
return getAllVariantContexts(ref, null, curLocation, true, false);
|
||||
@Requires({"l != null"})
|
||||
final private <T extends Feature> T safeGetFirst(final List<T> l) {
|
||||
return l.isEmpty() ? null : l.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only
|
||||
* VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation
|
||||
* must not be null, and only records whose start position is == to curLocation.getStart() will be returned.
|
||||
* If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course,
|
||||
* this single object must pass the allowed types and start here options if provided. Note that the result
|
||||
* may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
|
||||
* the current location.
|
||||
*
|
||||
* The name of each VariantContext corresponds to the ROD name.
|
||||
*
|
||||
* @param ref reference context
|
||||
* @param allowedTypes allowed types
|
||||
* @param curLocation location
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
List<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( RODRecordList rodList : getBoundRodTracks() ) {
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the variant contexts associated with track name name
|
||||
*
|
||||
* see getVariantContexts for more information.
|
||||
*
|
||||
* @param ref ReferenceContext to enable conversion to variant context
|
||||
* @param name name
|
||||
* @param curLocation location
|
||||
* @param allowedTypes allowed types
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @param takeFirstOnly do we take the first rod only?
|
||||
* @return variant context
|
||||
*/
|
||||
// public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
// }
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
}
|
||||
|
||||
// public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
|
||||
// }
|
||||
|
||||
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
private <T extends Feature> List<T> addValues(final Collection<String> names,
|
||||
final Class<T> type,
|
||||
List<T> values,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
|
||||
values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
|
||||
if ( takeFirstOnly && ! values.isEmpty() )
|
||||
break;
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
public Collection<VariantContext> getVariantContextsByPrefix(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
|
||||
|
||||
for ( String name : names ) {
|
||||
RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
|
||||
|
||||
if ( rodList != null )
|
||||
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
|
||||
}
|
||||
|
||||
return contexts;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
|
||||
* see getVariantContexts for more information.
|
||||
*
|
||||
* @param name name
|
||||
* @param curLocation location
|
||||
* @param allowedTypes allowed types
|
||||
* @param requireStartHere do we require the rod to start at this location?
|
||||
* @return variant context
|
||||
*/
|
||||
public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
|
||||
Collection<VariantContext> contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
|
||||
|
||||
if ( contexts.size() > 1 )
|
||||
throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
|
||||
else if ( contexts.size() == 0 )
|
||||
return null;
|
||||
else
|
||||
return contexts.iterator().next();
|
||||
}
|
||||
|
||||
/**
|
||||
* Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
|
||||
* null if there's no binding here.
|
||||
*
|
||||
* @param ref
|
||||
* @param name
|
||||
* @param curLocation
|
||||
* @return
|
||||
*/
|
||||
public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
|
||||
return getVariantContext(ref, name, null, curLocation, true);
|
||||
return values;
|
||||
}
|
||||
|
||||
|
||||
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
|
||||
|
||||
private <T extends Feature> List<T> addValues(final String name,
|
||||
final Class<T> type,
|
||||
List<T> values,
|
||||
final RODRecordList rodList,
|
||||
final GenomeLoc curLocation,
|
||||
final boolean requireStartHere,
|
||||
final boolean takeFirstOnly ) {
|
||||
for ( GATKFeature rec : rodList ) {
|
||||
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
|
||||
// ok, we might actually be able to turn this record in a variant context
|
||||
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
|
||||
if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
|
||||
Object obj = rec.getUnderlyingObject();
|
||||
if (!(type.isAssignableFrom(obj.getClass())))
|
||||
throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
|
||||
+ " it's of type " + obj.getClass());
|
||||
|
||||
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
|
||||
continue;
|
||||
T objT = (T)obj;
|
||||
if ( takeFirstOnly ) {
|
||||
if ( values == null )
|
||||
values = Arrays.asList(objT);
|
||||
else
|
||||
values.add(objT);
|
||||
|
||||
// now, let's decide if we want to keep it
|
||||
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
|
||||
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
|
||||
|
||||
if ( goodType && goodPos ) { // ok, we are going to keep this thing
|
||||
contexts.add(vc);
|
||||
|
||||
if ( takeFirstOnly )
|
||||
// we only want the first passing instance, so break the loop over records in rodList
|
||||
break;
|
||||
break;
|
||||
} else {
|
||||
if ( values == null )
|
||||
values = new ArrayList<T>();
|
||||
values.add(objT);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return values == null ? Collections.<T>emptyList() : values;
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
|
||||
* with the current site as a RODRecordList collection object. If no data track with specified name is available,
|
||||
* with the current site as a RODRecordList List object. If no data track with specified name is available,
|
||||
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
|
||||
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
|
||||
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
|
||||
|
|
@ -367,29 +423,16 @@ public class RefMetaDataTracker {
|
|||
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
|
||||
* regardless of the presence of "extended" RODs overlapping with that location).
|
||||
* @param name track name
|
||||
* @param requireExactMatch do we require an exact match of the rod name?
|
||||
* @return track data for the given rod
|
||||
*/
|
||||
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
|
||||
//logger.debug(String.format("Lookup %s%n", name));
|
||||
|
||||
private RODRecordList getTrackDataByName(final String name) {
|
||||
final String luName = canonicalName(name);
|
||||
RODRecordList trackData = null;
|
||||
RODRecordList l = map.get(luName);
|
||||
return l == null ? EMPTY_ROD_RECORD_LIST : l;
|
||||
}
|
||||
|
||||
if ( requireExactMatch ) {
|
||||
if ( map.containsKey(luName) )
|
||||
trackData = map.get(luName);
|
||||
} else {
|
||||
for ( Map.Entry<String, RODRecordList> datum : map.entrySet() ) {
|
||||
final String rodName = datum.getKey();
|
||||
if ( datum.getValue() != null && rodName.startsWith(luName) ) {
|
||||
if ( trackData == null ) trackData = new RODRecordListImpl(name);
|
||||
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
|
||||
((RODRecordListImpl)trackData).add(datum.getValue(), true);
|
||||
}
|
||||
}
|
||||
}
|
||||
return trackData;
|
||||
private RODRecordList getTrackDataByName(final RodBinding binding) {
|
||||
return getTrackDataByName(binding.getName());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -398,6 +441,7 @@ public class RefMetaDataTracker {
|
|||
* @return canonical name of the rod
|
||||
*/
|
||||
private final String canonicalName(final String name) {
|
||||
// todo -- remove me after switch to RodBinding syntax
|
||||
return name.toLowerCase();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,130 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.*;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Class for representing arbitrary reference ordered data sets
|
||||
* <p/>
|
||||
* User: mdepristo
|
||||
* Date: Feb 27, 2009
|
||||
* Time: 10:47:14 AM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ReferenceOrderedDatum> {
|
||||
private String name;
|
||||
private File file = null;
|
||||
// private String fieldDelimiter;
|
||||
|
||||
/** Header object returned from the datum */
|
||||
// private Object header = null;
|
||||
|
||||
private Class<ROD> type = null; // runtime type information for object construction
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
|
||||
|
||||
/**
|
||||
* given an existing file, open it and append all the valid triplet lines to an existing list
|
||||
*
|
||||
* @param rodTripletList the list of existing triplets
|
||||
* @param filename the file to attempt to extract ROD triplets from
|
||||
*/
|
||||
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
|
||||
BufferedReader str;
|
||||
try {
|
||||
str = new BufferedReader(new FileReader(new File(filename)));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e);
|
||||
}
|
||||
String line = "NO LINES READ IN";
|
||||
try {
|
||||
while ((line = str.readLine()) != null) {
|
||||
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
|
||||
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
|
||||
this.name = name;
|
||||
this.file = file;
|
||||
this.type = type;
|
||||
// this.header = initializeROD(name, file, type);
|
||||
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
|
||||
}
|
||||
|
||||
public String getName() { return name; }
|
||||
|
||||
public File getFile() { return file; }
|
||||
|
||||
public Class<ROD> getType() { return type; }
|
||||
|
||||
/**
|
||||
* Special equals override to see if this ROD is compatible with the given
|
||||
* name and type. 'Compatible' means that this ROD has the name that's passed
|
||||
* in and its data can fit into the container specified by type.
|
||||
*
|
||||
* @param name Name to check.
|
||||
* @param type Type to check.
|
||||
*
|
||||
* @return True if these parameters imply this rod. False otherwise.
|
||||
*/
|
||||
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
|
||||
return this.name.equals(name) && type.isAssignableFrom(this.type);
|
||||
}
|
||||
|
||||
public Iterator<ReferenceOrderedDatum> iterator() {
|
||||
Iterator<ReferenceOrderedDatum> it;
|
||||
try {
|
||||
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
|
||||
it = (Iterator<ReferenceOrderedDatum>) m.invoke(null, name, file);
|
||||
} catch (java.lang.NoSuchMethodException e) {
|
||||
it = new RODRecordIterator(file,name,type);
|
||||
} catch (java.lang.NullPointerException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.SecurityException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.IllegalAccessException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.IllegalArgumentException e) {
|
||||
throw new RuntimeException(e);
|
||||
} catch (java.lang.reflect.InvocationTargetException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
// return new RODIterator<ROD>(it);
|
||||
return it;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Manipulations of all of the data
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
|
||||
final FileWriter out = new FileWriter(output);
|
||||
|
||||
for (ReferenceOrderedDatum rec : data) {
|
||||
out.write(rec.repl() + "\n");
|
||||
}
|
||||
|
||||
out.close();
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* An interface marking that a given Tribble codec can look at the file and determine whether the
|
||||
* codec specifically parsing the contents of the file.
|
||||
*/
|
||||
public interface SelfScopingFeatureCodec {
|
||||
/**
|
||||
* This function returns true iff the File potentialInput can be parsed by this
|
||||
* codec.
|
||||
*
|
||||
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
|
||||
* return true for the same file. If this occurs the GATK splits out an error.
|
||||
*
|
||||
* Note this function must never throw an error. All errors should be trapped
|
||||
* and false returned.
|
||||
*
|
||||
* @param potentialInput the file to test for parsiability with this codec
|
||||
* @return true if potentialInput can be parsed, false otherwise
|
||||
*/
|
||||
public boolean canDecode(final File potentialInput);
|
||||
}
|
||||
|
|
@ -1,12 +1,13 @@
|
|||
package org.broadinstitute.sting.gatk.refdata;
|
||||
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.dbsnp.OldDbSNPFeature;
|
||||
import org.broad.tribble.gelitext.GeliTextFeature;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -92,28 +93,89 @@ public class VariantContextAdaptors {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private static class DBSnpAdaptor implements VCAdaptor {
|
||||
private static boolean isSNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||
}
|
||||
|
||||
private static boolean isMNP(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
||||
}
|
||||
|
||||
private static boolean isInsertion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("insertion");
|
||||
}
|
||||
|
||||
private static boolean isDeletion(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("deletion");
|
||||
}
|
||||
|
||||
private static boolean isIndel(OldDbSNPFeature feature) {
|
||||
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
|
||||
}
|
||||
|
||||
public static boolean isComplexIndel(OldDbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList(feature))
|
||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlleleList(OldDbSNPFeature feature) {
|
||||
List<String> alleleList = new ArrayList<String>();
|
||||
// add ref first
|
||||
if ( feature.getStrand() == Strand.POSITIVE )
|
||||
alleleList = Arrays.asList(feature.getObserved());
|
||||
else
|
||||
for (String str : feature.getObserved())
|
||||
alleleList.add(SequenceUtil.reverseComplement(str));
|
||||
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
|
||||
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
|
||||
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts non-VCF formatted dbSNP records to VariantContext.
|
||||
* @return DbSNPFeature.
|
||||
* @return OldDbSNPFeature.
|
||||
*/
|
||||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; }
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
|
||||
|
||||
@Override
|
||||
public VariantContext convert(String name, Object input, ReferenceContext ref) {
|
||||
DbSNPFeature dbsnp = (DbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
|
||||
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
|
||||
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
|
||||
return null;
|
||||
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
|
||||
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
|
||||
|
||||
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || DbSNPHelper.isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
|
||||
// add the reference allele
|
||||
List<Allele> alleles = new ArrayList<Allele>();
|
||||
alleles.add(refAllele);
|
||||
|
||||
// add all of the alt alleles
|
||||
boolean sawNullAllele = false;
|
||||
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
|
||||
boolean sawNullAllele = refAllele.isNull();
|
||||
for ( String alt : getAlternateAlleleList(dbsnp) ) {
|
||||
if ( ! Allele.acceptableAlleleBases(alt) ) {
|
||||
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
|
||||
return null;
|
||||
|
|
@ -127,14 +189,13 @@ public class VariantContextAdaptors {
|
|||
Map<String, Object> attributes = new HashMap<String, Object>();
|
||||
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
|
||||
|
||||
if ( sawNullAllele ) {
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
|
||||
}
|
||||
Collection<Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
|
||||
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
Map<String, Genotype> genotypes = null;
|
||||
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
|
||||
return vc;
|
||||
} else
|
||||
return null; // can't handle anything else
|
||||
|
|
@ -164,16 +225,6 @@ public class VariantContextAdaptors {
|
|||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
// VariantContext convert(String name, Object input) {
|
||||
// return convert(name, input, null);
|
||||
// }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
|
|
@ -237,17 +288,7 @@ public class VariantContextAdaptors {
|
|||
* @return HapMapFeature.
|
||||
*/
|
||||
@Override
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
* @param name the name of the ROD
|
||||
* @param input the Rod object, in this case a RodGeliText
|
||||
* @return a VariantContext object
|
||||
*/
|
||||
// VariantContext convert(String name, Object input) {
|
||||
// return convert(name, input, null);
|
||||
// }
|
||||
public Class<? extends Feature> getAdaptableFeatureType() { return RawHapMapFeature.class; }
|
||||
|
||||
/**
|
||||
* convert to a Variant Context, given:
|
||||
|
|
@ -261,7 +302,12 @@ public class VariantContextAdaptors {
|
|||
if ( ref == null )
|
||||
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
|
||||
|
||||
HapMapFeature hapmap = (HapMapFeature)input;
|
||||
RawHapMapFeature hapmap = (RawHapMapFeature)input;
|
||||
|
||||
int index = hapmap.getStart() - ref.getWindow().getStart();
|
||||
if ( index < 0 )
|
||||
return null; // we weren't given enough reference context to create the VariantContext
|
||||
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
|
||||
|
||||
HashSet<Allele> alleles = new HashSet<Allele>();
|
||||
Allele refSNPAllele = Allele.create(ref.getBase(), true);
|
||||
|
|
@ -271,7 +317,7 @@ public class VariantContextAdaptors {
|
|||
// use the actual alleles, if available
|
||||
if ( alleleMap != null ) {
|
||||
alleles.addAll(alleleMap.values());
|
||||
Allele deletionAllele = alleleMap.get(HapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
|
||||
Allele deletionAllele = alleleMap.get(RawHapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
|
||||
if ( deletionAllele != null && deletionAllele.isReference() )
|
||||
deletionLength = deletionAllele.length();
|
||||
} else {
|
||||
|
|
@ -321,7 +367,7 @@ public class VariantContextAdaptors {
|
|||
long end = hapmap.getEnd();
|
||||
if ( deletionLength > 0 )
|
||||
end += deletionLength;
|
||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
|
||||
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
|
||||
return vc;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,193 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.exception.CodecLineParsingException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
|
||||
|
||||
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
||||
|
||||
public static final String DELIMITER = "\t";
|
||||
|
||||
private ArrayList<String> header;
|
||||
|
||||
/**
|
||||
* The parser to use when resolving genome-wide locations.
|
||||
*/
|
||||
private GenomeLocParser genomeLocParser;
|
||||
|
||||
/**
|
||||
* Set the parser to use when resolving genetic data.
|
||||
* @param genomeLocParser The supplied parser.
|
||||
*/
|
||||
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses the header.
|
||||
*
|
||||
* @param reader
|
||||
*
|
||||
* @return The # of header lines for this file.
|
||||
*/
|
||||
public Object readHeader(LineReader reader)
|
||||
{
|
||||
int[] lineCounter = new int[1];
|
||||
try {
|
||||
header = readHeader(reader, lineCounter);
|
||||
} catch(IOException e) {
|
||||
throw new IllegalArgumentException("Unable to read from file.", e);
|
||||
}
|
||||
return header;
|
||||
}
|
||||
|
||||
public Class<AnnotatorInputTableFeature> getFeatureType() {
|
||||
return AnnotatorInputTableFeature.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Feature decodeLoc(String line) {
|
||||
StringTokenizer st = new StringTokenizer(line, DELIMITER);
|
||||
if ( st.countTokens() < 1 )
|
||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
||||
|
||||
GenomeLoc loc;
|
||||
String chr = st.nextToken();
|
||||
if ( chr.indexOf(":") != -1 ) {
|
||||
loc = genomeLocParser.parseGenomeLoc(chr);
|
||||
} else {
|
||||
if ( st.countTokens() < 3 )
|
||||
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
|
||||
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
|
||||
}
|
||||
return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Parses the line into an AnnotatorInputTableFeature object.
|
||||
*
|
||||
* @param line
|
||||
*/
|
||||
public AnnotatorInputTableFeature decode(String line) {
|
||||
final ArrayList<String> header = this.header; //optimization
|
||||
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
|
||||
|
||||
if ( values.size() != header.size()) {
|
||||
throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
|
||||
}
|
||||
|
||||
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
|
||||
for ( int i = 0; i < header.size(); i++ ) {
|
||||
feature.putColumnValue(header.get(i), values.get(i));
|
||||
}
|
||||
|
||||
GenomeLoc loc;
|
||||
if ( values.get(0).indexOf(":") != -1 )
|
||||
loc = genomeLocParser.parseGenomeLoc(values.get(0));
|
||||
else
|
||||
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
|
||||
|
||||
//parse the location
|
||||
feature.setChr(loc.getContig());
|
||||
feature.setStart((int)loc.getStart());
|
||||
feature.setEnd((int)loc.getStop());
|
||||
|
||||
return feature;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the header.
|
||||
* @param source
|
||||
* @return
|
||||
* @throws IOException
|
||||
*/
|
||||
public static ArrayList<String> readHeader(final File source) throws IOException {
|
||||
FileInputStream is = new FileInputStream(source);
|
||||
try {
|
||||
return readHeader(new AsciiLineReader(is), null);
|
||||
} finally {
|
||||
is.close();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
|
||||
* @param source
|
||||
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
|
||||
* @return The header fields.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
|
||||
|
||||
ArrayList<String> header = null;
|
||||
int numLines = 0;
|
||||
|
||||
//find the 1st line that's non-empty and not a comment
|
||||
String line = null;
|
||||
while( (line = source.readLine()) != null ) {
|
||||
numLines++;
|
||||
if ( line.trim().isEmpty() || line.startsWith("#") ) {
|
||||
continue;
|
||||
}
|
||||
|
||||
//parse the header
|
||||
header = Utils.split(line, DELIMITER);
|
||||
break;
|
||||
}
|
||||
|
||||
// check that we found the header
|
||||
if ( header == null ) {
|
||||
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
|
||||
}
|
||||
|
||||
if(lineCounter != null) {
|
||||
lineCounter[0] = numLines;
|
||||
}
|
||||
|
||||
logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
|
||||
|
||||
return header;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,158 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.features.annotator;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* This class represents a single record in an AnnotatorInputTable.
|
||||
*/
|
||||
public class AnnotatorInputTableFeature implements Feature {
|
||||
|
||||
private ArrayList<String> columnNames;
|
||||
private HashMap<String, String> columnValues; //maps colum names to column values
|
||||
|
||||
private String chr;
|
||||
private int start;
|
||||
private int end;
|
||||
private String strRep = null;
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param chr The chromosome name.
|
||||
* @param start The start position
|
||||
* @param end The end position
|
||||
*/
|
||||
public AnnotatorInputTableFeature(String chr, int start, int end) {
|
||||
this.chr = chr;
|
||||
this.start = start;
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Constructor.
|
||||
* @param columnNames The column names as parsed out of the file header.
|
||||
*/
|
||||
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
|
||||
this.columnNames = columnNames;
|
||||
this.columnValues = new HashMap<String, String>();
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* @return the list of column names from the file header.
|
||||
*/
|
||||
public ArrayList<String> getHeader() {
|
||||
return columnNames;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the value of the given column.
|
||||
*
|
||||
* @param columnName The column name as it appears in the file header.
|
||||
* @return The value
|
||||
*/
|
||||
public String getColumnValue(final String columnName) {
|
||||
return columnValues.get(columnName);
|
||||
}
|
||||
|
||||
|
||||
public boolean containsColumnName(final String columnName) {
|
||||
return columnValues.containsKey(columnName);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Sets the value for the given column.
|
||||
*
|
||||
* @param columnName The column name as it appears in the file header.
|
||||
* @param value The value
|
||||
* @return The existing value associated with the columnName, if there is one.
|
||||
*/
|
||||
protected String putColumnValue(final String columnName, final String value) {
|
||||
return columnValues.put(columnName, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all values in this line, hashed by their column names.
|
||||
*/
|
||||
public Map<String,String> getColumnValues() {
|
||||
return Collections.unmodifiableMap(columnValues);
|
||||
}
|
||||
|
||||
|
||||
public String getChr() {
|
||||
return chr;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return start;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return end;
|
||||
}
|
||||
|
||||
protected void setChr(String chr) {
|
||||
this.chr = chr;
|
||||
}
|
||||
|
||||
protected void setStart(int start) {
|
||||
this.start = start;
|
||||
}
|
||||
|
||||
protected void setEnd(int end) {
|
||||
this.end = end;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
if ( strRep == null ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for(String columnName : columnNames ) {
|
||||
if ( sb.length() == 0 )
|
||||
sb.append("[");
|
||||
else
|
||||
sb.append(", ");
|
||||
sb.append(columnName + "=" + columnValues.get(columnName));
|
||||
}
|
||||
sb.append("]");
|
||||
|
||||
strRep = sb.toString();
|
||||
}
|
||||
|
||||
return strRep;
|
||||
}
|
||||
}
|
||||
|
|
@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
|
||||
|
|
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
|
|||
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
||||
|
||||
// find the types available to the track builders
|
||||
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
|
||||
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
|
||||
|
||||
// check that the type is valid
|
||||
if (!typeMapping.containsKey(inputFileType))
|
||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet()));
|
||||
if (descriptor == null)
|
||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
|
||||
|
||||
// create the codec
|
||||
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType));
|
||||
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
|
||||
|
||||
// check if it's a reference dependent feature codec
|
||||
if (codec instanceof ReferenceDependentFeatureCodec)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,255 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.NameAwareCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.help.GATKDocUtils;
|
||||
import org.broadinstitute.sting.utils.help.HelpUtils;
|
||||
|
||||
import javax.mail.Header;
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
* Class for managing Tribble Feature readers available to the GATK. The features
|
||||
* are dynamically determined via a PluginManager. This class provides convenient
|
||||
* getter methods for obtaining FeatureDescriptor objects that collect all of the
|
||||
* useful information about the Tribble Codec, Feature, and name in one place.
|
||||
*
|
||||
* @author depristo
|
||||
*/
|
||||
public class FeatureManager {
|
||||
public static class FeatureDescriptor implements Comparable<FeatureDescriptor> {
|
||||
final String name;
|
||||
final FeatureCodec codec;
|
||||
|
||||
public FeatureDescriptor(final String name, final FeatureCodec codec) {
|
||||
this.name = name;
|
||||
this.codec = codec;
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
|
||||
public FeatureCodec getCodec() {
|
||||
return codec;
|
||||
}
|
||||
public Class getCodecClass() { return codec.getClass(); }
|
||||
public Class getFeatureClass() { return codec.getFeatureType(); }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
|
||||
getName(), getCodecClass().getName(), getFeatureClass().getName());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(FeatureDescriptor o) {
|
||||
return getName().compareTo(o.getName());
|
||||
}
|
||||
}
|
||||
|
||||
private final PluginManager<FeatureCodec> pluginManager;
|
||||
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
|
||||
|
||||
/**
|
||||
* Construct a FeatureManager
|
||||
*/
|
||||
public FeatureManager() {
|
||||
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
|
||||
|
||||
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
|
||||
FeatureCodec codec = pluginManager.createByName(rawName);
|
||||
String name = rawName.toUpperCase();
|
||||
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
|
||||
featureDescriptors.add(featureDescriptor);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
|
||||
*
|
||||
* @param codecClass
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("codecClass != null")
|
||||
public FeatureDescriptor getByCodec(Class codecClass) {
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getCodecClass().equals(codecClass) )
|
||||
return descriptor;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of FeatureDescriptors that emit records of type featureClass
|
||||
*
|
||||
* @param featureClass
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("featureClass != null")
|
||||
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
|
||||
Set<FeatureDescriptor> consistentDescriptors = new TreeSet<FeatureDescriptor>();
|
||||
|
||||
if (featureClass == null)
|
||||
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
||||
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors ) {
|
||||
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
|
||||
consistentDescriptors.add(descriptor);
|
||||
}
|
||||
return consistentDescriptors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the FeatureDescriptor with getName().equals(name)
|
||||
*
|
||||
* @param name
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires("name != null")
|
||||
public FeatureDescriptor getByName(String name) {
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getName().equalsIgnoreCase(name) )
|
||||
return descriptor;
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
|
||||
*
|
||||
* @param file
|
||||
* @return A FeatureDescriptor or null if none is found
|
||||
*/
|
||||
@Requires({"file != null", "file.isFile()", "file.canRead()"})
|
||||
public FeatureDescriptor getByFiletype(File file) {
|
||||
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
|
||||
for ( FeatureDescriptor descriptor : featureDescriptors )
|
||||
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
|
||||
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
|
||||
canParse.add(descriptor);
|
||||
}
|
||||
}
|
||||
|
||||
if ( canParse.size() == 0 )
|
||||
return null;
|
||||
else if ( canParse.size() > 1 )
|
||||
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
|
||||
else
|
||||
return canParse.get(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
|
||||
* @param triplet
|
||||
* @return
|
||||
*/
|
||||
@Requires("triplet != null")
|
||||
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
|
||||
return getByName(triplet.getType());
|
||||
}
|
||||
|
||||
/**
|
||||
* @return all of the FeatureDescriptors available to the GATK. Never null
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public Collection<FeatureDescriptor> getFeatureDescriptors() {
|
||||
return Collections.unmodifiableCollection(featureDescriptors);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public String userFriendlyListOfAvailableFeatures() {
|
||||
return userFriendlyListOfAvailableFeatures(Feature.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
|
||||
* restricted to only Codecs producting Features consistent with the requiredFeatureType
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
public String userFriendlyListOfAvailableFeatures(Class<? extends Feature> requiredFeatureType) {
|
||||
final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
|
||||
|
||||
int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
|
||||
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
|
||||
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
|
||||
maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
|
||||
maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
|
||||
}
|
||||
}
|
||||
|
||||
StringBuilder docs = new StringBuilder();
|
||||
String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
|
||||
docs.append(String.format(format, nameHeader, featureHeader, docHeader));
|
||||
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
|
||||
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
|
||||
String oneDoc = String.format(format,
|
||||
descriptor.getName(),
|
||||
descriptor.getSimpleFeatureName(),
|
||||
GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
|
||||
docs.append(oneDoc);
|
||||
}
|
||||
}
|
||||
|
||||
return docs.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new FeatureCodec of the type described in descriptor, assigning it the
|
||||
* name (if possible) and providing it the genomeLocParser (where necessary)
|
||||
*
|
||||
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
|
||||
* @param name the name to assign this codec
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
|
||||
@Ensures("result != null")
|
||||
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
|
||||
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
if ( codex instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
return codex;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* <p/>
|
||||
* Interface QueryableTrack
|
||||
* <p/>
|
||||
* a decorator interface for tracks that are queryable
|
||||
*/
|
||||
public interface QueryableTrack {
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
|
||||
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
|
||||
public void close();
|
||||
}
|
||||
|
|
@ -25,8 +25,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
|
|||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broad.tribble.iterators.CloseableTribbleIterator;
|
||||
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
|
@ -45,10 +49,10 @@ import java.io.IOException;
|
|||
* the basics of what a reference metadata track must contain.
|
||||
*/
|
||||
public class RMDTrack {
|
||||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||
|
||||
// the basics of a track:
|
||||
private final Class type; // our type
|
||||
private final Class recordType; // the underlying records that are produced by this track
|
||||
private final String name; // the name
|
||||
private final File file; // the associated file we create the reader from
|
||||
|
||||
|
|
@ -90,7 +94,6 @@ public class RMDTrack {
|
|||
*/
|
||||
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
|
||||
this.type = type;
|
||||
this.recordType = codec.getFeatureType();
|
||||
this.name = name;
|
||||
this.file = file;
|
||||
this.reader = reader;
|
||||
|
|
@ -112,19 +115,10 @@ public class RMDTrack {
|
|||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
||||
}
|
||||
|
||||
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
|
||||
CloseableTribbleIterator<Feature> iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop());
|
||||
if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||
logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog());
|
||||
return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName());
|
||||
}
|
||||
|
||||
public void close() {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2010 The Broad Institute
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
|
|
@ -12,37 +12,36 @@
|
|||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
||||
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.*;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.source.BasicFeatureSource;
|
||||
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broadinstitute.sting.commandline.Tags;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -53,7 +52,10 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
|||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
|
||||
|
|
@ -67,17 +69,16 @@ import java.util.*;
|
|||
* that gets iterators from the FeatureReader using Tribble.
|
||||
*
|
||||
*/
|
||||
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
||||
public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
||||
/**
|
||||
* our log, which we use to capture anything from this class
|
||||
*/
|
||||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
||||
|
||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||
|
||||
private Map<String, Class> classes = null;
|
||||
|
||||
// private sequence dictionary we use to set our tracks with
|
||||
private SAMSequenceDictionary dict = null;
|
||||
|
||||
|
|
@ -91,6 +92,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
*/
|
||||
private ValidationExclusion.TYPE validationExclusionType;
|
||||
|
||||
FeatureManager featureManager;
|
||||
|
||||
/**
|
||||
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
|
||||
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
|
||||
|
|
@ -102,29 +105,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
public RMDTrackBuilder(SAMSequenceDictionary dict,
|
||||
GenomeLocParser genomeLocParser,
|
||||
ValidationExclusion.TYPE validationExclusionType) {
|
||||
super(FeatureCodec.class, "Codecs", "Codec");
|
||||
this.dict = dict;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.validationExclusionType = validationExclusionType;
|
||||
|
||||
classes = new HashMap<String, Class>();
|
||||
for (String name: this.getPluginsByName().keySet()) {
|
||||
classes.put(name.toUpperCase(), getPluginsByName().get(name));
|
||||
} }
|
||||
|
||||
/** @return a list of all available track types we currently have access to create */
|
||||
public Map<String, Class> getAvailableTrackNamesAndTypes() {
|
||||
return Collections.unmodifiableMap(classes);
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
featureManager = new FeatureManager();
|
||||
}
|
||||
|
||||
/** @return a list of all available track record types we currently have access to create */
|
||||
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
|
||||
HashMap classToRecord = new HashMap<String, Class>();
|
||||
for (String name: this.getPluginsByName().keySet()) {
|
||||
FeatureCodec codec = this.createByName(name);
|
||||
classToRecord.put(name, codec.getFeatureType());
|
||||
}
|
||||
return classToRecord;
|
||||
public FeatureManager getFeatureManager() {
|
||||
return featureManager;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -133,45 +121,38 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* @param fileDescriptor a description of the type of track to build.
|
||||
*
|
||||
* @return an instance of the track
|
||||
* @throws RMDTrackCreationException
|
||||
* if we don't know of the target class or we couldn't create it
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException {
|
||||
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
|
||||
String name = fileDescriptor.getName();
|
||||
File inputFile = new File(fileDescriptor.getFile());
|
||||
|
||||
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
|
||||
if (featureCodecClass == null)
|
||||
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
|
||||
if (descriptor == null)
|
||||
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
|
||||
|
||||
// return a feature reader track
|
||||
Pair<FeatureSource, SAMSequenceDictionary> pair;
|
||||
if (inputFile.getAbsolutePath().endsWith(".gz"))
|
||||
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
|
||||
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
|
||||
else
|
||||
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
|
||||
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
|
||||
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
|
||||
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
|
||||
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
|
||||
* @param targetClass Type of Tribble class to build.
|
||||
* @param codecClass Type of Tribble codec class to build.
|
||||
* @param inputFile Input file type to use.
|
||||
* @return An RMDTrack, suitable for accessing reference metadata.
|
||||
*/
|
||||
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
|
||||
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
|
||||
String typeName = null;
|
||||
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
|
||||
if(trackType.getValue().equals(targetClass))
|
||||
typeName = trackType.getKey();
|
||||
}
|
||||
public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
|
||||
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||
|
||||
if(typeName == null)
|
||||
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
|
||||
if (descriptor == null)
|
||||
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
||||
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -179,16 +160,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
* reader of the appropriate type will figure out what the right index type is, and determine if it
|
||||
* exists.
|
||||
*
|
||||
* @param targetClass the codec class type
|
||||
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||
* @param name the name of the track
|
||||
* @param inputFile the file to load
|
||||
* @return a feature reader implementation
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
|
||||
// we might not know the index type, try loading with the default reader constructor
|
||||
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
|
||||
try {
|
||||
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
|
||||
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
|
||||
} catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage(), e);
|
||||
}
|
||||
|
|
@ -196,28 +177,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
/**
|
||||
* add a name to the codec, if it takes one
|
||||
* @param targetClass the class to create a codec for
|
||||
* @param descriptor the class to create a codec for
|
||||
* @param name the name to assign this codec
|
||||
* @return the feature codec itself
|
||||
*/
|
||||
public FeatureCodec createCodec(Class targetClass, String name) {
|
||||
FeatureCodec codex = this.createByType(targetClass);
|
||||
if ( codex instanceof NameAwareCodec )
|
||||
((NameAwareCodec)codex).setName(name);
|
||||
if(codex instanceof ReferenceDependentFeatureCodec)
|
||||
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
|
||||
return codex;
|
||||
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
|
||||
return featureManager.createCodec(descriptor, name, genomeLocParser);
|
||||
}
|
||||
|
||||
/**
|
||||
* create a feature source object given:
|
||||
* @param targetClass the target class
|
||||
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
|
||||
* @param name the name of the codec
|
||||
* @param inputFile the tribble file to parse
|
||||
* @param storageType How the RMD is streamed into the input file.
|
||||
* @return the input file as a FeatureReader
|
||||
*/
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
|
||||
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
|
||||
String name,
|
||||
File inputFile,
|
||||
RMDStorageType storageType) {
|
||||
// Feature source and sequence dictionary to use as the ultimate reference
|
||||
FeatureSource featureSource = null;
|
||||
SAMSequenceDictionary sequenceDictionary = null;
|
||||
|
|
@ -227,7 +206,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
|
||||
if(canBeIndexed) {
|
||||
try {
|
||||
Index index = loadIndex(inputFile, createCodec(targetClass, name));
|
||||
Index index = loadIndex(inputFile, createCodec(descriptor, name));
|
||||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch (ReviewedStingException e) { }
|
||||
|
||||
|
|
@ -240,7 +219,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
|
||||
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||
featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
|
||||
else
|
||||
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
|
||||
}
|
||||
catch (TribbleException e) {
|
||||
throw new UserException(e.getMessage());
|
||||
|
|
@ -250,7 +232,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
}
|
||||
}
|
||||
else {
|
||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false);
|
||||
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
|
||||
}
|
||||
|
||||
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
|
||||
|
|
@ -385,22 +367,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
|
|||
return idx;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a collection of track names that match the record type.
|
||||
* @param trackRecordType the record type specified in the @RMD annotation
|
||||
* @return a collection of available track record type names that match the record type
|
||||
*/
|
||||
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
|
||||
Set<String> names = new TreeSet<String>();
|
||||
if (trackRecordType == null)
|
||||
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
|
||||
|
||||
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
|
||||
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
|
||||
names.add(availableTrackRecordType.getKey());
|
||||
}
|
||||
return names;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// static functions to work with the sequence dictionaries of indexes
|
||||
|
|
@ -1,45 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class RMDTrackCreationException
|
||||
*
|
||||
* if we fail for some reason to make a track, throw this exception
|
||||
*/
|
||||
public class RMDTrackCreationException extends ReviewedStingException {
|
||||
public RMDTrackCreationException(String msg) {
|
||||
super(msg);
|
||||
}
|
||||
|
||||
public RMDTrackCreationException(String message, Throwable throwable) {
|
||||
super(message, throwable);
|
||||
}
|
||||
}
|
||||
|
|
@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
|||
|
||||
public abstract GenomeLoc getLocation();
|
||||
|
||||
// TODO: this should be a Feature
|
||||
public abstract Object getUnderlyingObject();
|
||||
|
||||
/**
|
||||
|
|
@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
|
|||
return feature.getEnd();
|
||||
}
|
||||
|
||||
// TODO: this should be a Feature, actually
|
||||
public Object getUnderlyingObject() {
|
||||
return feature;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* wrapping a old style rod into the new GATK feature style
|
||||
*/
|
||||
public static class RODGATKFeature extends GATKFeature {
|
||||
|
||||
// our data
|
||||
private ReferenceOrderedDatum datum;
|
||||
|
||||
public RODGATKFeature(ReferenceOrderedDatum datum) {
|
||||
super(datum.getName());
|
||||
this.datum = datum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public GenomeLoc getLocation() {
|
||||
return datum.getLocation();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getUnderlyingObject() {
|
||||
return datum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getChr() {
|
||||
return datum.getLocation().getContig();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getStart() {
|
||||
return (int)datum.getLocation().getStart();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getEnd() {
|
||||
return (int)datum.getLocation().getStop();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,65 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2010. The Broad Institute
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import net.sf.samtools.util.CloseableIterator;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class GATKFeatureIterator
|
||||
*
|
||||
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
|
||||
*/
|
||||
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
|
||||
private final Iterator<ReferenceOrderedDatum> iter;
|
||||
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
|
||||
this.iter = iter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iter.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKFeature next() {
|
||||
return new GATKFeature.RODGATKFeature(iter.next());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException("Remove not supported");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// do nothing, our underlying iterator doesn't support this
|
||||
}
|
||||
}
|
||||
|
|
@ -1,190 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
|
||||
|
||||
import net.sf.samtools.util.SequenceUtil;
|
||||
import org.broad.tribble.annotation.Strand;
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* this class contains static helper methods for DbSNP
|
||||
*/
|
||||
public class DbSNPHelper {
|
||||
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
|
||||
|
||||
private DbSNPHelper() {} // don't make a DbSNPHelper
|
||||
|
||||
public static DbSNPFeature getFirstRealSNP(List<Object> dbsnpList) {
|
||||
if (dbsnpList == null)
|
||||
return null;
|
||||
|
||||
DbSNPFeature dbsnp = null;
|
||||
for (Object d : dbsnpList) {
|
||||
if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) {
|
||||
dbsnp = (DbSNPFeature) d;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return dbsnp;
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealSNP(List<Object> featureList) {
|
||||
if (featureList == null)
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( Object d : featureList ) {
|
||||
if ( d instanceof DbSNPFeature ) {
|
||||
if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) {
|
||||
rsID = ((DbSNPFeature)d).getRsID();
|
||||
break;
|
||||
}
|
||||
} else if ( d instanceof VariantContext) {
|
||||
if ( ((VariantContext)d).isSNP() ) {
|
||||
rsID = ((VariantContext)d).getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
public static String rsIDOfFirstRealIndel(List<Object> featureList) {
|
||||
if (featureList == null)
|
||||
return null;
|
||||
|
||||
String rsID = null;
|
||||
for ( Object d : featureList ) {
|
||||
if ( d instanceof DbSNPFeature ) {
|
||||
if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) {
|
||||
rsID = ((DbSNPFeature)d).getRsID();
|
||||
break;
|
||||
}
|
||||
} else if ( d instanceof VariantContext) {
|
||||
if ( ((VariantContext)d).isIndel() ) {
|
||||
rsID = ((VariantContext)d).getID();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rsID;
|
||||
}
|
||||
|
||||
/**
|
||||
* get the -1 * (log 10 of the error value)
|
||||
*
|
||||
* @return the log based error estimate
|
||||
*/
|
||||
public static double getNegLog10PError(DbSNPFeature feature) {
|
||||
return 4; // -log10(0.0001)
|
||||
}
|
||||
|
||||
//
|
||||
// What kind of variant are we?
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
public static boolean isSNP(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
|
||||
}
|
||||
|
||||
public static boolean isMNP(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
|
||||
}
|
||||
|
||||
public static String toMediumString(DbSNPFeature feature) {
|
||||
String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved()));
|
||||
if (isSNP(feature)) s += ":SNP";
|
||||
if (isIndel(feature)) s += ":Indel";
|
||||
if (isHapmap(feature)) s += ":Hapmap";
|
||||
if (is2Hit2Allele(feature)) s += ":2Hit";
|
||||
return s;
|
||||
}
|
||||
|
||||
public static boolean isInsertion(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("insertion");
|
||||
}
|
||||
|
||||
public static boolean isDeletion(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("deletion");
|
||||
}
|
||||
|
||||
public static boolean isIndel(DbSNPFeature feature) {
|
||||
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature);
|
||||
}
|
||||
|
||||
public static boolean isComplexIndel(DbSNPFeature feature) {
|
||||
return feature.getVariantType().contains("in-del");
|
||||
}
|
||||
|
||||
public static boolean isHapmap(DbSNPFeature feature) {
|
||||
return feature.getValidationStatus().contains("by-hapmap");
|
||||
}
|
||||
|
||||
public static boolean is2Hit2Allele(DbSNPFeature feature) {
|
||||
return feature.getValidationStatus().contains("by-2hit-2allele");
|
||||
}
|
||||
|
||||
public static boolean is1000genomes(DbSNPFeature feature) {
|
||||
return feature.getValidationStatus().contains("by-1000genomes");
|
||||
}
|
||||
|
||||
public static boolean isMQ1(DbSNPFeature feature) {
|
||||
return feature.getWeight() == 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alternate alleles. This method should return all the alleles present at the location,
|
||||
* NOT including the reference base. This is returned as a string list with no guarantee ordering
|
||||
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
|
||||
* frequency).
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
|
||||
List<String> ret = new ArrayList<String>();
|
||||
for (String allele : getAlleleList(feature))
|
||||
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
|
||||
return ret;
|
||||
}
|
||||
|
||||
public static boolean onFwdStrand(DbSNPFeature feature) {
|
||||
return feature.getStrand() == Strand.POSITIVE;
|
||||
}
|
||||
|
||||
public static String getReference(DbSNPFeature feature) {
|
||||
return feature.getNCBIRefBase();
|
||||
}
|
||||
|
||||
public static String toSimpleString(DbSNPFeature feature) {
|
||||
return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-");
|
||||
}
|
||||
|
||||
/**
|
||||
* gets the alleles. This method should return all the alleles present at the location,
|
||||
* including the reference base. The first allele should always be the reference allele, followed
|
||||
* by an unordered list of alternate alleles.
|
||||
*
|
||||
* @return an alternate allele list
|
||||
*/
|
||||
public static List<String> getAlleleList(DbSNPFeature feature) {
|
||||
List<String> alleleList = new ArrayList<String>();
|
||||
// add ref first
|
||||
if ( onFwdStrand(feature) )
|
||||
alleleList = Arrays.asList(feature.getObserved());
|
||||
else
|
||||
for (String str : feature.getObserved())
|
||||
alleleList.add(SequenceUtil.reverseComplement(str));
|
||||
if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) )
|
||||
Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0);
|
||||
|
||||
return alleleList;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,21 +1,25 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Container class for GATK report tables
|
||||
*/
|
||||
public class GATKReport {
|
||||
private TreeMap<String, GATKReportTable> tables;
|
||||
public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
|
||||
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
|
||||
|
||||
/**
|
||||
* Create a new, empty GATKReport.
|
||||
*/
|
||||
public GATKReport() {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -23,7 +27,7 @@ public class GATKReport {
|
|||
* @param filename the path to the file to load
|
||||
*/
|
||||
public GATKReport(String filename) {
|
||||
loadReport(new File(filename));
|
||||
this(new File(filename));
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -31,7 +35,6 @@ public class GATKReport {
|
|||
* @param file the file to load
|
||||
*/
|
||||
public GATKReport(File file) {
|
||||
tables = new TreeMap<String, GATKReportTable>();
|
||||
loadReport(file);
|
||||
}
|
||||
|
||||
|
|
@ -46,11 +49,17 @@ public class GATKReport {
|
|||
GATKReportTable table = null;
|
||||
String[] header = null;
|
||||
int id = 0;
|
||||
GATKReportVersion version = null;
|
||||
List<Integer> columnStarts = null;
|
||||
|
||||
String line;
|
||||
while ( (line = reader.readLine()) != null ) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
|
||||
|
||||
if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
|
||||
|
||||
version = GATKReportVersion.fromHeader(line);
|
||||
|
||||
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
|
||||
String[] pieces = line.split(" : ");
|
||||
|
||||
String tableName = pieces[0];
|
||||
|
|
@ -58,14 +67,35 @@ public class GATKReport {
|
|||
|
||||
addTable(tableName, tableDesc);
|
||||
table = getTable(tableName);
|
||||
table.setVersion(version);
|
||||
|
||||
header = null;
|
||||
} else if ( line.isEmpty() ) {
|
||||
columnStarts = null;
|
||||
} else if ( line.trim().isEmpty() ) {
|
||||
// do nothing
|
||||
} else {
|
||||
if (table != null) {
|
||||
|
||||
String[] splitLine;
|
||||
|
||||
switch (version) {
|
||||
case V0_1:
|
||||
splitLine = TextFormattingUtils.splitWhiteSpace(line);
|
||||
break;
|
||||
|
||||
case V0_2:
|
||||
if (header == null) {
|
||||
columnStarts = TextFormattingUtils.getWordStarts(line);
|
||||
}
|
||||
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
|
||||
}
|
||||
|
||||
if (header == null) {
|
||||
header = line.split("\\s+");
|
||||
header = splitLine;
|
||||
|
||||
table.addPrimaryKey("id", false);
|
||||
|
||||
|
|
@ -75,10 +105,8 @@ public class GATKReport {
|
|||
|
||||
id = 0;
|
||||
} else {
|
||||
String[] entries = line.split("\\s+");
|
||||
|
||||
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
|
||||
table.set(id, header[columnIndex], entries[columnIndex]);
|
||||
table.set(id, header[columnIndex], splitLine[columnIndex]);
|
||||
}
|
||||
|
||||
id++;
|
||||
|
|
@ -125,7 +153,10 @@ public class GATKReport {
|
|||
* @return the table object
|
||||
*/
|
||||
public GATKReportTable getTable(String tableName) {
|
||||
return tables.get(tableName);
|
||||
GATKReportTable table = tables.get(tableName);
|
||||
if (table == null)
|
||||
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
|
||||
return table;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -140,4 +171,8 @@ public class GATKReport {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
public Collection<GATKReportTable> getTables() {
|
||||
return tables.values();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
|
||||
* values) in the table.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be set
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public Object getWithoutSideEffects(Object primaryKey) {
|
||||
private Object getWithoutSideEffects(Object primaryKey) {
|
||||
if (!this.containsKey(primaryKey)) {
|
||||
return defaultValue;
|
||||
}
|
||||
|
|
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
return this.get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an object from the column, but if it doesn't exist, return the default value.
|
||||
*
|
||||
* @param primaryKey the primary key position in the column that should be retrieved
|
||||
* @return the string value at the specified position in the column, or the default value if the element is not set
|
||||
*/
|
||||
public String getStringValue(Object primaryKey) {
|
||||
return toString(getWithoutSideEffects(primaryKey));
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the displayable property of the column. If true, the column will be displayed in the final output.
|
||||
* If not, printing will be suppressed for the contents of the table.
|
||||
|
|
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
for (Object obj : this.values()) {
|
||||
if (obj != null) {
|
||||
int width = obj.toString().length();
|
||||
int width = toString(obj).length();
|
||||
|
||||
if (width > maxWidth) {
|
||||
maxWidth = width;
|
||||
|
|
@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
|
|||
|
||||
return maxWidth;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a string version of the values.
|
||||
* @param obj The object to convert to a string
|
||||
* @return The string representation of the column
|
||||
*/
|
||||
private static String toString(Object obj) {
|
||||
String value;
|
||||
if (obj == null) {
|
||||
value = "null";
|
||||
} else if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
public String getColumnName() {
|
||||
return columnName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,55 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Tracks a linked list of GATKReportColumn in order by name.
|
||||
*/
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||
private List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
* Returns the column by index
|
||||
* @param i the index
|
||||
* @return The column
|
||||
*/
|
||||
public GATKReportColumn getByIndex(int i) {
|
||||
return get(columnNames.get(i));
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKReportColumn remove(Object key) {
|
||||
columnNames.remove(key);
|
||||
return super.remove(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public GATKReportColumn put(String key, GATKReportColumn value) {
|
||||
columnNames.add(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,83 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public class GATKReportParser {
|
||||
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
|
||||
|
||||
public void parse(File file) throws IOException {
|
||||
InputStream stream = FileUtils.openInputStream(file);
|
||||
try {
|
||||
parse(stream);
|
||||
} finally {
|
||||
IOUtils.closeQuietly(stream);
|
||||
}
|
||||
}
|
||||
|
||||
public void parse(InputStream input) throws IOException {
|
||||
GATKReportTableParser table = null;
|
||||
|
||||
for (String line: new XReadLines(input)) {
|
||||
if (line.startsWith("##:GATKReport.v0.1 ")) {
|
||||
table = newTableParser(line);
|
||||
tables.add(table);
|
||||
table.parse(line);
|
||||
} else if (table != null) {
|
||||
if (line.trim().length() == 0)
|
||||
table = null;
|
||||
else
|
||||
table.parse(line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String[] key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String tableName, String key, String column) {
|
||||
for (GATKReportTableParser table: tables)
|
||||
if (table.getTableName().equals(tableName))
|
||||
return table.getValue(key, column);
|
||||
return null;
|
||||
}
|
||||
|
||||
private GATKReportTableParser newTableParser(String header) {
|
||||
return new GATKReportTableParser();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.ObjectUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.PrintStream;
|
||||
|
|
@ -88,17 +89,22 @@ import java.util.regex.Pattern;
|
|||
* but at least the prototype contained herein works.
|
||||
*
|
||||
* @author Kiran Garimella
|
||||
* @author Khalid Shakir
|
||||
*/
|
||||
public class GATKReportTable {
|
||||
/** REGEX that matches any table with an invalid name */
|
||||
public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
|
||||
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
|
||||
private String tableName;
|
||||
private String tableDescription;
|
||||
private GATKReportVersion version = LATEST_REPORT_VERSION;
|
||||
|
||||
private String primaryKeyName;
|
||||
private Collection<Object> primaryKeyColumn;
|
||||
private boolean primaryKeyDisplay;
|
||||
boolean sortByPrimaryKey = true;
|
||||
private boolean sortByPrimaryKey = true;
|
||||
|
||||
private LinkedHashMap<String, GATKReportColumn> columns;
|
||||
private GATKReportColumns columns;
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
|
|
@ -107,12 +113,25 @@ public class GATKReportTable {
|
|||
* @return true if the name is valid, false if otherwise
|
||||
*/
|
||||
private boolean isValidName(String name) {
|
||||
Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");
|
||||
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
|
||||
Matcher m = p.matcher(name);
|
||||
|
||||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
|
||||
*
|
||||
* @param description the name of the table or column
|
||||
* @return true if the name is valid, false if otherwise
|
||||
*/
|
||||
private boolean isValidDescription(String description) {
|
||||
Pattern p = Pattern.compile("\\r|\\n");
|
||||
Matcher m = p.matcher(description);
|
||||
|
||||
return !m.find();
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a new GATK report table with the specified name and description
|
||||
*
|
||||
|
|
@ -128,11 +147,23 @@ public class GATKReportTable {
|
|||
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
|
||||
}
|
||||
|
||||
if (!isValidDescription(tableDescription)) {
|
||||
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
|
||||
}
|
||||
|
||||
this.tableName = tableName;
|
||||
this.tableDescription = tableDescription;
|
||||
this.sortByPrimaryKey = sortByPrimaryKey;
|
||||
|
||||
columns = new LinkedHashMap<String, GATKReportColumn>();
|
||||
columns = new GATKReportColumns();
|
||||
}
|
||||
|
||||
public GATKReportVersion getVersion() {
|
||||
return version;
|
||||
}
|
||||
|
||||
protected void setVersion(GATKReportVersion version) {
|
||||
this.version = version;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -161,6 +192,57 @@ public class GATKReportTable {
|
|||
primaryKeyDisplay = display;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or throws an exception.
|
||||
*/
|
||||
public Object getPrimaryKey(String dottedColumnValues) {
|
||||
Object key = findPrimaryKey(dottedColumnValues);
|
||||
if (key == null)
|
||||
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
|
||||
return key;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if there is at least on row with the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return true if there is at least one row matching the columns.
|
||||
*/
|
||||
public boolean containsPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the dotted column values.
|
||||
* Ex: dbsnp.eval.called.all.novel.all
|
||||
* @param dottedColumnValues Period concatenated values.
|
||||
* @return The first primary key matching the column values or null.
|
||||
*/
|
||||
private Object findPrimaryKey(String dottedColumnValues) {
|
||||
return findPrimaryKey(dottedColumnValues.split("\\."));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the first primary key matching the column values.
|
||||
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
|
||||
* @param columnValues column values.
|
||||
* @return The first primary key matching the column values.
|
||||
*/
|
||||
private Object findPrimaryKey(Object[] columnValues) {
|
||||
for (Object primaryKey : primaryKeyColumn) {
|
||||
boolean matching = true;
|
||||
for (int i = 0; matching && i < columnValues.length; i++) {
|
||||
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
|
||||
}
|
||||
if (matching)
|
||||
return primaryKey;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
|
||||
*
|
||||
|
|
@ -230,6 +312,17 @@ public class GATKReportTable {
|
|||
return columns.get(columnName).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a value from the given position in the table
|
||||
*
|
||||
* @param primaryKey the primary key value
|
||||
* @param columnIndex the index of the column
|
||||
* @return the value stored at the specified position in the table
|
||||
*/
|
||||
private Object get(Object primaryKey, int columnIndex) {
|
||||
return columns.getByIndex(columnIndex).get(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Increment an element in the table. This implementation is awful - a functor would probably be better.
|
||||
*
|
||||
|
|
@ -515,7 +608,7 @@ public class GATKReportTable {
|
|||
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
|
||||
|
||||
// Emit the table definition
|
||||
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
|
||||
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
|
||||
|
||||
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
|
||||
boolean needsPadding = false;
|
||||
|
|
@ -545,22 +638,8 @@ public class GATKReportTable {
|
|||
|
||||
for (String columnName : columns.keySet()) {
|
||||
if (columns.get(columnName).isDisplayable()) {
|
||||
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
|
||||
|
||||
if (needsPadding) { out.printf(" "); }
|
||||
|
||||
String value = "null";
|
||||
if (obj != null) {
|
||||
if (obj instanceof Float) {
|
||||
value = String.format("%.8f", (Float) obj);
|
||||
} else if (obj instanceof Double) {
|
||||
value = String.format("%.8f", (Double) obj);
|
||||
} else {
|
||||
value = obj.toString();
|
||||
}
|
||||
}
|
||||
|
||||
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
|
||||
String value = columns.get(columnName).getStringValue(primaryKey);
|
||||
out.printf(columnWidths.get(columnName), value);
|
||||
|
||||
needsPadding = true;
|
||||
|
|
@ -577,4 +656,16 @@ public class GATKReportTable {
|
|||
public int getNumRows() {
|
||||
return primaryKeyColumn.size();
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public String getTableDescription() {
|
||||
return tableDescription;
|
||||
}
|
||||
|
||||
public GATKReportColumns getColumns() {
|
||||
return columns;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,75 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
public class GATKReportTableParser {
|
||||
private int lineNum = 0;
|
||||
private String[] descriptions;
|
||||
private Map<String, Integer> headers = new HashMap<String, Integer>();
|
||||
private List<String[]> values = new ArrayList<String[]>();
|
||||
|
||||
public void parse(String line) {
|
||||
lineNum++;
|
||||
switch (lineNum) {
|
||||
case 1:
|
||||
descriptions = parseLine(line);
|
||||
case 2:
|
||||
String[] columnHeaders = parseLine(line);
|
||||
for (int i = 0; i < columnHeaders.length; i++)
|
||||
headers.put(columnHeaders[i], i);
|
||||
default:
|
||||
values.add(parseLine(line));
|
||||
}
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return descriptions[1];
|
||||
}
|
||||
|
||||
public String getValue(String[] key, String column) {
|
||||
if (!headers.containsKey(column))
|
||||
return null;
|
||||
for (String[] row: values)
|
||||
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
|
||||
return row[headers.get(column)];
|
||||
return null;
|
||||
}
|
||||
|
||||
public String getValue(String key, String column) {
|
||||
return getValue(key.split("\\."), column);
|
||||
}
|
||||
|
||||
private String generateKey(String[] row, int i) {
|
||||
return StringUtils.join(row, ".", 0, i);
|
||||
}
|
||||
|
||||
private String[] parseLine(String line) {
|
||||
return line.split(" +");
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
public enum GATKReportVersion {
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Does not allow spaces in cells.
|
||||
* - Mostly fixed width but has a bug where the string width of floating point
|
||||
* values was not measured correctly leading to columns that aren't aligned
|
||||
*/
|
||||
V0_1("v0.1"),
|
||||
|
||||
/**
|
||||
* Differences between other versions:
|
||||
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
|
||||
* - Fixed width fixed for floating point values
|
||||
*/
|
||||
V0_2("v0.2");
|
||||
|
||||
public final String versionString;
|
||||
|
||||
private GATKReportVersion(String versionString) {
|
||||
this.versionString = versionString;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return versionString;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the GATK Report Version from the file header.
|
||||
* @param header Header from the file starting with ##:GATKReport.v[version]
|
||||
* @return The version as an enum.
|
||||
*/
|
||||
public static GATKReportVersion fromHeader(String header) {
|
||||
if (header.startsWith("##:GATKReport.v0.1 "))
|
||||
return GATKReportVersion.V0_1;
|
||||
|
||||
if (header.startsWith("##:GATKReport.v0.2 "))
|
||||
return GATKReportVersion.V0_2;
|
||||
|
||||
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
|
||||
}
|
||||
}
|
||||
|
|
@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
|
||||
|
||||
/** We use the SimpleTimer to time our run */
|
||||
private SimpleTimer timer = new SimpleTimer("Traversal");
|
||||
private SimpleTimer timer = null;
|
||||
|
||||
// How long can we go without printing some progress info?
|
||||
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
|
||||
private int printProgressCheckCounter = 0;
|
||||
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
|
||||
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
|
||||
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
|
||||
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
|
||||
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
|
||||
|
|
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
}
|
||||
}
|
||||
/**
|
||||
* Should be called to indicate that we're going to process records and the timer should start ticking
|
||||
* Should be called to indicate that we're going to process records and the timer should start ticking. This
|
||||
* function should be called right before any traversal work is done, to avoid counting setup costs in the
|
||||
* processing costs and inflating the estimated runtime.
|
||||
*/
|
||||
public void startTimers() {
|
||||
timer.start();
|
||||
lastProgressPrintTime = timer.currentTime();
|
||||
public void startTimersIfNecessary() {
|
||||
if ( timer == null ) {
|
||||
timer = new SimpleTimer("Traversal");
|
||||
timer.start();
|
||||
lastProgressPrintTime = timer.currentTime();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
* @return true if the maximum interval (in millisecs) has passed since the last printing
|
||||
*/
|
||||
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
|
||||
return (curTime - lastPrintTime) > printFreq;
|
||||
long elapsed = curTime - lastPrintTime;
|
||||
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -173,7 +173,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
* -> those with the same mate pair position, for paired reads
|
||||
* -> those flagged as unpaired and duplicated but having the same start and end
|
||||
*/
|
||||
boolean done = walker.isDone();
|
||||
for (SAMRecord read : iter) {
|
||||
if ( done ) break;
|
||||
// get the genome loc from the read
|
||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
|
||||
|
|
@ -194,6 +196,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
}
|
||||
|
||||
printProgress(dataProvider.getShard(),site);
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
return sum;
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
|
||||
|
||||
LocusView locusView = getLocusView( walker, dataProvider );
|
||||
boolean done = false;
|
||||
|
||||
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
|
||||
|
||||
|
|
@ -46,7 +47,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
|
||||
|
||||
// We keep processing while the next reference location is within the interval
|
||||
while( locusView.hasNext() ) {
|
||||
while( locusView.hasNext() && ! done ) {
|
||||
AlignmentContext locus = locusView.next();
|
||||
GenomeLoc location = locus.getLocation();
|
||||
|
||||
|
|
@ -65,26 +66,28 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
|
|||
referenceView.expandBoundsToAccomodateLoc(location);
|
||||
}
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
|
||||
|
||||
// create reference context. Note that if we have a pileup of "extended events", the context will
|
||||
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
|
||||
ReferenceContext refContext = referenceView.getReferenceContext(location);
|
||||
|
||||
// Iterate forward to get all reference ordered data covering this location
|
||||
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
|
||||
|
||||
final boolean keepMeP = walker.filter(tracker, refContext, locus);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(tracker, refContext, locus);
|
||||
sum = walker.reduce(x, sum);
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
printProgress(dataProvider.getShard(),locus.getLocation());
|
||||
}
|
||||
}
|
||||
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
|
||||
// We have a final map call to execute here to clean up the skipped based from the
|
||||
// last position in the ROD to that in the interval
|
||||
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
|
||||
// only do this if the walker isn't done!
|
||||
RodLocusView rodLocusView = (RodLocusView)locusView;
|
||||
long nSkipped = rodLocusView.getLastSkippedBases();
|
||||
if ( nSkipped > 0 ) {
|
||||
|
|
|
|||
|
|
@ -50,7 +50,9 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
|||
ReadView reads = new ReadView(dataProvider);
|
||||
List<SAMRecord> pairs = new ArrayList<SAMRecord>();
|
||||
|
||||
boolean done = walker.isDone();
|
||||
for(SAMRecord read: reads) {
|
||||
if ( done ) break;
|
||||
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
|
||||
|
||||
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
|
||||
|
|
@ -65,6 +67,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
|
|||
|
||||
printProgress(dataProvider.getShard(),null);
|
||||
}
|
||||
|
||||
done = walker.isDone();
|
||||
}
|
||||
|
||||
// If any data was left in the queue, process it.
|
||||
|
|
|
|||
|
|
@ -82,8 +82,10 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
// get the reference ordered data
|
||||
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
|
||||
|
||||
boolean done = walker.isDone();
|
||||
// while we still have more reads
|
||||
for (SAMRecord read : reads) {
|
||||
if ( done ) break;
|
||||
// ReferenceContext -- the reference bases covered by the read
|
||||
ReferenceContext refContext = null;
|
||||
|
||||
|
|
@ -106,6 +108,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
|
||||
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
|
||||
printProgress(dataProvider.getShard(),locus);
|
||||
done = walker.isDone();
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,5 +23,4 @@ import java.lang.annotation.*;
|
|||
@Target(ElementType.TYPE)
|
||||
public @interface Allows {
|
||||
DataSource[] value();
|
||||
RMD[] referenceMetaData() default {};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
|
|||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
||||
|
|
@ -50,44 +52,158 @@ import java.util.regex.Matcher;
|
|||
import java.util.regex.Pattern;
|
||||
|
||||
/**
|
||||
* This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads
|
||||
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
|
||||
* This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.
|
||||
*
|
||||
*
|
||||
* <p>
|
||||
* It allows the user to clip bases in reads with poor quality scores, that match particular
|
||||
* sequences, or that were generated by particular machine cycles.
|
||||
*
|
||||
* <dl>
|
||||
* <dt>Quality score based clipping</dt>
|
||||
* <dd>
|
||||
* Clip bases from the read in clipper from
|
||||
* <br>argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)</br>
|
||||
* to the end of the read. This is blatantly stolen from BWA.
|
||||
*
|
||||
* Walk through the read from the end (in machine cycle order) to the beginning, calculating the
|
||||
* running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this
|
||||
* sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the
|
||||
* clipping index in the read (from the end).
|
||||
* </dd>
|
||||
* <dt>Cycle based clipping</dt>
|
||||
* <dd>Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
|
||||
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions).
|
||||
* For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12.
|
||||
* </dd>
|
||||
* <dt>Sequence matching</dt>
|
||||
* <dd>Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm,
|
||||
* filtering only bases whose sequence exactly matches SEQ.</dd>
|
||||
* </dl>
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* Any number of BAM files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A new BAM file containing all of the reads from the input BAMs with the user-specified clipping
|
||||
* operation applied to each read.
|
||||
* </p>
|
||||
* <p>
|
||||
* <h3>Summary output</h3>
|
||||
* <pre>
|
||||
* Number of examined reads 13
|
||||
* Number of clipped reads 13
|
||||
* Percent of clipped reads 100.00
|
||||
* Number of examined bases 988
|
||||
* Number of clipped bases 126
|
||||
* Percent of clipped bases 12.75
|
||||
* Number of quality-score clipped bases 126
|
||||
* Number of range clipped bases 0
|
||||
* Number of sequence clipped bases 0
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* <h3>Example clipping</h3>
|
||||
* Suppose we are given this read:
|
||||
* <pre>
|
||||
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||
* </pre>
|
||||
*
|
||||
* If we are clipping reads with -QT 10 and -CR WRITE_NS, we get:
|
||||
*
|
||||
* <pre>
|
||||
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||
* NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||
* </pre>
|
||||
*
|
||||
* Whereas with -CR WRITE_Q0S:
|
||||
* <pre>
|
||||
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
|
||||
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||
* !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||
* </pre>
|
||||
*
|
||||
* Or -CR SOFTCLIP_BASES:
|
||||
* <pre>
|
||||
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3133 29 17S59M * * *
|
||||
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
|
||||
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
|
||||
* </pre>
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
|
||||
* -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
|
||||
* </pre>
|
||||
|
||||
* @author Mark DePristo
|
||||
* @since 2010
|
||||
*/
|
||||
@Requires({DataSource.READS})
|
||||
public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.ClippingData> {
|
||||
@Output
|
||||
PrintStream out;
|
||||
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
|
||||
/**
|
||||
* If provided, ClipReads will write summary statistics about the clipping operations applied
|
||||
* to the reads to this file.
|
||||
*/
|
||||
@Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false)
|
||||
PrintStream out = null;
|
||||
|
||||
/**
|
||||
* an optional argument to dump the reads out to a BAM file
|
||||
* The output SAM/BAM file will be written here
|
||||
*/
|
||||
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
|
||||
StingSAMFileWriter outputBam = null;
|
||||
@Output(doc = "Write BAM output here", required = true)
|
||||
StingSAMFileWriter outputBam;
|
||||
|
||||
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
|
||||
/**
|
||||
* If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this
|
||||
* quality score threshold.
|
||||
*/
|
||||
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false)
|
||||
int qTrimmingThreshold = -1;
|
||||
|
||||
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
|
||||
/**
|
||||
* Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
|
||||
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based
|
||||
* values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11,
|
||||
* and 12.
|
||||
*/
|
||||
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false)
|
||||
String cyclesToClipArg = null;
|
||||
|
||||
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false)
|
||||
/**
|
||||
* Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the
|
||||
* sequences in the file.
|
||||
*/
|
||||
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false)
|
||||
String clipSequenceFile = null;
|
||||
|
||||
/**
|
||||
* Clips bases from the reads matching the provided SEQ. Can be provided any number of times on the command line
|
||||
*/
|
||||
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
|
||||
String[] clipSequencesArgs = null;
|
||||
|
||||
@Argument(fullName="read", doc="", required=false)
|
||||
String onlyDoRead = null;
|
||||
|
||||
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
|
||||
//boolean keepCompletelyClippedReads = false;
|
||||
|
||||
// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
|
||||
// boolean onlyClipFirstSeqMatch = false;
|
||||
|
||||
/**
|
||||
* The different values for this argument determines how ClipReads applies clips to the reads. This can range
|
||||
* from writing Ns over the clipped bases to hard clipping away the bases from the BAM.
|
||||
*/
|
||||
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
|
||||
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
|
||||
|
||||
@Hidden
|
||||
@Advanced
|
||||
@Argument(fullName="read", doc="", required=false)
|
||||
String onlyDoRead = null;
|
||||
|
||||
/**
|
||||
* List of sequence that should be clipped from the reads
|
||||
|
|
@ -180,12 +296,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
* @param read the read itself, as a SAMRecord
|
||||
* @return the ReadClipper object describing what should be done to clip this read
|
||||
*/
|
||||
public ReadClipper map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
||||
}
|
||||
ReadClipper clipper = new ReadClipper(read);
|
||||
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
|
||||
|
||||
//
|
||||
// run all three clipping modules
|
||||
|
|
@ -205,9 +321,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
*
|
||||
* @param clipper
|
||||
*/
|
||||
private void clipSequences(ReadClipper clipper) {
|
||||
private void clipSequences(ReadClipperWithData clipper) {
|
||||
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
||||
SAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (SeqToClip stc : sequencesToClip) {
|
||||
// we have a pattern for both the forward and the reverse strands
|
||||
|
|
@ -223,11 +340,14 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
if (found) {
|
||||
int start = match.start();
|
||||
int stop = match.end() - 1;
|
||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
||||
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
|
||||
ClippingOp op = new ClippingOp(start, stop);
|
||||
clipper.addOp(op);
|
||||
data.incSeqClippedBases(stc.seq, op.getLength());
|
||||
}
|
||||
}
|
||||
}
|
||||
clipper.setData(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -252,9 +372,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
*
|
||||
* @param clipper
|
||||
*/
|
||||
private void clipCycles(ReadClipper clipper) {
|
||||
private void clipCycles(ReadClipperWithData clipper) {
|
||||
if (cyclesToClip != null) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
||||
int cycleStart = p.first;
|
||||
|
|
@ -270,10 +391,13 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
int start = startStop.first;
|
||||
int stop = startStop.second;
|
||||
|
||||
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
||||
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
|
||||
ClippingOp op = new ClippingOp(start, stop);
|
||||
clipper.addOp(op);
|
||||
data.incNRangeClippedBases(op.getLength());
|
||||
}
|
||||
}
|
||||
clipper.setData(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -291,8 +415,9 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
*
|
||||
* @param clipper
|
||||
*/
|
||||
private void clipBadQualityScores(ReadClipper clipper) {
|
||||
private void clipBadQualityScores(ReadClipperWithData clipper) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
int readLen = read.getReadBases().length;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
||||
|
|
@ -311,8 +436,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
if (clipPoint != -1) {
|
||||
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
|
||||
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
|
||||
clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
||||
//clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
|
||||
ClippingOp op = new ClippingOp(start, stop);
|
||||
clipper.addOp(op);
|
||||
data.incNQClippedBases(op.getLength());
|
||||
}
|
||||
clipper.setData(data);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -325,7 +454,7 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
return new ClippingData(sequencesToClip);
|
||||
}
|
||||
|
||||
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
|
||||
public ClippingData reduce(ReadClipperWithData clipper, ClippingData data) {
|
||||
if ( clipper == null )
|
||||
return data;
|
||||
|
||||
|
|
@ -340,23 +469,8 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
data.nTotalBases += clipper.getRead().getReadLength();
|
||||
if (clipper.wasClipped()) {
|
||||
data.nClippedReads++;
|
||||
for (ClippingOp op : clipper.getOps()) {
|
||||
switch (op.type) {
|
||||
case LOW_Q_SCORES:
|
||||
data.incNQClippedBases(op.getLength());
|
||||
break;
|
||||
case WITHIN_CLIP_RANGE:
|
||||
data.incNRangeClippedBases(op.getLength());
|
||||
break;
|
||||
case MATCHES_CLIP_SEQ:
|
||||
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
|
||||
break;
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected Clipping operator type " + op);
|
||||
}
|
||||
}
|
||||
data.addData(clipper.getData());
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
|
|
@ -417,6 +531,23 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
|
||||
}
|
||||
|
||||
public void addData (ClippingData data) {
|
||||
nTotalReads += data.nTotalReads;
|
||||
nTotalBases += data.nTotalBases;
|
||||
nClippedReads += data.nClippedReads;
|
||||
nClippedBases += data.nClippedBases;
|
||||
nQClippedBases += data.nQClippedBases;
|
||||
nRangeClippedBases += data.nRangeClippedBases;
|
||||
nSeqClippedBases += data.nSeqClippedBases;
|
||||
|
||||
for (String seqClip : data.seqClipCounts.keySet()) {
|
||||
Long count = data.seqClipCounts.get(seqClip);
|
||||
if (seqClipCounts.containsKey(seqClip))
|
||||
count += seqClipCounts.get(seqClip);
|
||||
seqClipCounts.put(seqClip, count);
|
||||
}
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
StringBuilder s = new StringBuilder();
|
||||
|
||||
|
|
@ -439,4 +570,27 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
|
|||
return s.toString();
|
||||
}
|
||||
}
|
||||
|
||||
public class ReadClipperWithData extends ReadClipper {
|
||||
private ClippingData data;
|
||||
|
||||
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
super(read);
|
||||
data = new ClippingData(clipSeqs);
|
||||
}
|
||||
|
||||
public ClippingData getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
public void setData(ClippingData data) {
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public void addData(ClippingData data) {
|
||||
this.data.addData(data);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
|
|
@ -17,7 +17,7 @@ import java.util.Set;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE})
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
|
||||
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
||||
|
|
@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
|
|
|
|||
|
|
@ -25,15 +25,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.dbsnp.DbSNPFeature;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
|
|
@ -41,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
|||
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
|
|
@ -68,6 +68,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
|
||||
public boolean SHOW_INDEL_PILEUPS = false;
|
||||
|
||||
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
|
||||
public List<RodBinding<Feature>> rods = Collections.emptyList();
|
||||
|
||||
public void initialize() {
|
||||
}
|
||||
|
||||
|
|
@ -112,18 +115,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
*/
|
||||
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
|
||||
ArrayList<String> rodStrings = new ArrayList<String>();
|
||||
for ( GATKFeature datum : tracker.getAllRods() ) {
|
||||
if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) {
|
||||
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
|
||||
}
|
||||
for ( Feature datum : tracker.getValues(rods) ) {
|
||||
rodStrings.add(datum.toString());
|
||||
}
|
||||
String rodString = Utils.join(", ", rodStrings);
|
||||
|
||||
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
|
||||
|
||||
if ( dbsnp != null)
|
||||
rodString += DbSNPHelper.toMediumString(dbsnp);
|
||||
|
||||
if ( !rodString.equals("") )
|
||||
rodString = "[ROD: " + rodString + "]";
|
||||
|
||||
|
|
@ -132,8 +128,6 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
|
|||
|
||||
@Override
|
||||
public void onTraversalDone(Integer result) {
|
||||
// Double check traversal result to make count is the same.
|
||||
// TODO: Is this check necessary?
|
||||
out.println("[REDUCE RESULT] Traversal result is: " + result);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,21 +25,24 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Iterator;
|
||||
|
||||
/**
|
||||
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
|
||||
* of the given ROD.
|
||||
*/
|
||||
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
||||
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
|
||||
public RodBinding<Feature> input;
|
||||
|
||||
@Output
|
||||
PrintStream out;
|
||||
|
||||
|
|
@ -61,11 +64,8 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
|
|||
if ( tracker == null )
|
||||
return 0;
|
||||
|
||||
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
|
||||
while ( rods.hasNext() ) {
|
||||
Object rod = rods.next().getUnderlyingObject();
|
||||
if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) )
|
||||
out.println(rod.toString());
|
||||
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
|
||||
out.println(feature.toString());
|
||||
}
|
||||
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -40,26 +40,72 @@ import java.util.TreeSet;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
|
||||
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
|
||||
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter
|
||||
* command line argument.
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||
*
|
||||
* <p>
|
||||
* PrintReads can dynamically merge the contents of multiple input BAM files, resulting
|
||||
* in merged output sorted in coordinate order. Can also optionally filter reads based on the
|
||||
* --read_filter command line argument.
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* One or more bam files.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* A single processed bam file.
|
||||
* </p>
|
||||
*
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T PrintReads \
|
||||
* -o output.bam \
|
||||
* -I input1.bam \
|
||||
* -I input2.bam \
|
||||
* --read_filter MappingQualityZero
|
||||
*
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T PrintReads \
|
||||
* -o output.bam \
|
||||
* -I input.bam \
|
||||
* -n 2000
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE})
|
||||
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
||||
/** an optional argument to dump the reads out to a BAM file */
|
||||
|
||||
@Output(doc="Write output to this BAM filename instead of STDOUT")
|
||||
SAMFileWriter out;
|
||||
|
||||
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
|
||||
String readGroup = null;
|
||||
|
||||
/**
|
||||
* For example, --platform ILLUMINA or --platform 454.
|
||||
*/
|
||||
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
|
||||
String platform = null; // E.g. ILLUMINA, 454
|
||||
String platform = null;
|
||||
|
||||
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
|
||||
int nReadsToPrint = -1;
|
||||
|
||||
/**
|
||||
* Only reads from samples listed in the provided file(s) will be included in the output.
|
||||
*/
|
||||
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
|
||||
public Set<File> sampleFile = new TreeSet<File>();
|
||||
|
||||
/**
|
||||
* Only reads from the sample(s) will be included in the output.
|
||||
*/
|
||||
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
|
||||
public Set<String> sampleNames = new TreeSet<String>();
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.picard.filter.SamRecordFilter;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.lang.annotation.*;
|
||||
|
||||
|
|
|
|||
|
|
@ -26,11 +26,14 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -44,6 +47,10 @@ import java.util.List;
|
|||
@ReadFilters(MalformedReadFilter.class)
|
||||
@PartitionBy(PartitionType.NONE)
|
||||
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
|
||||
@DocumentedGATKFeature(
|
||||
groupName = "GATK walkers",
|
||||
summary = "General tools available for running on the command line as part of the GATK package",
|
||||
extraDocs = {CommandLineGATK.class})
|
||||
public abstract class Walker<MapType, ReduceType> {
|
||||
final protected static Logger logger = Logger.getLogger(Walker.class);
|
||||
private GenomeAnalysisEngine toolkit;
|
||||
|
|
@ -119,6 +126,17 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
|
||||
public void initialize() { }
|
||||
|
||||
/**
|
||||
* A function for overloading in subclasses providing a mechanism to abort early from a walker.
|
||||
*
|
||||
* If this ever returns true, then the Traversal engine will stop executing map calls
|
||||
* and start the process of shutting down the walker in an orderly fashion.
|
||||
* @return
|
||||
*/
|
||||
public boolean isDone() {
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Provide an initial value for reduce computations.
|
||||
* @return Initial value of reduce.
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -42,9 +43,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class AlleleBalance implements InfoFieldAnnotation {
|
||||
public class AlleleBalance extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
@ -89,7 +90,7 @@ public class AlleleBalance implements InfoFieldAnnotation {
|
|||
}
|
||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||
int refCount = indelPileup.size();
|
||||
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
|
||||
if ( refCount + altCount == 0 ) {
|
||||
continue;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -15,9 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation {
|
||||
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
Double ratio = annotateSNP(stratifiedContext, vc, g);
|
||||
if (ratio == null)
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ import java.util.Map;
|
|||
|
||||
|
||||
|
||||
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
|
||||
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
|
||||
|
||||
|
||||
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
|
|
@ -46,9 +47,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class BaseCounts implements InfoFieldAnnotation {
|
||||
public class BaseCounts extends InfoFieldAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -43,14 +44,14 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
|
||||
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
|
||||
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.hasGenotypes() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
|
||||
|
|
@ -16,9 +17,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( stratifiedContexts.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
|
||||
|
|
@ -22,13 +23,13 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation {
|
||||
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
|
||||
|
||||
private static String REF_ALLELE = "REF";
|
||||
|
||||
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
|
||||
if ( g == null || !g.isCalled() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
|
|
@ -42,11 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
import java.util.*;
|
||||
|
||||
|
||||
public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private static final String FS = "FS";
|
||||
private static final double MIN_PVALUE = 1E-320;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if ( ! vc.isVariant() || vc.isFiltered() )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
|
|
@ -16,9 +17,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
|
||||
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
double content = computeGCContent(ref);
|
||||
Map<String, Object> map = new HashMap<String, Object>();
|
||||
map.put(getKeyNames().get(0), String.format("%.2f", content));
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
|
|
@ -23,11 +24,11 @@ import java.util.Map;
|
|||
*/
|
||||
|
||||
// A set of annotations calculated directly from the GLs
|
||||
public class GLstats implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
|
||||
|
|
@ -48,13 +49,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
|
||||
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
private final static boolean DEBUG = false;
|
||||
private final static int MIN_CONTEXT_WING_SIZE = 10;
|
||||
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
|
||||
private final static char REGEXP_WILDCARD = '.';
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
|
||||
return null;
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
|
||||
import org.broadinstitute.sting.utils.QualityUtils;
|
||||
|
|
@ -18,13 +19,13 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
|
||||
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
|
||||
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
|
||||
|
||||
private static final int MIN_SAMPLES = 10;
|
||||
private static final int MIN_GENOTYPE_QUALITY = 10;
|
||||
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
|
||||
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
|
||||
|
||||
final Map<String, Genotype> genotypes = vc.getGenotypes();
|
||||
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue