Merge remote-tracking branch 'unstable/master'

This commit is contained in:
Eric Banks 2011-09-15 21:14:17 -04:00
commit 3cd9f3fe81
434 changed files with 19991 additions and 14388 deletions

477
build.xml
View File

@ -43,16 +43,17 @@
<property name="scala.classes" value="${build.dir}/scala/classes" />
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
<property name="javadoc.dir" value="javadoc" />
<property name="scaladoc.dir" value="scaladoc" />
<!-- Contracts for Java -->
<!-- To disable, run with -Duse.contracts=false -->
<property name="use.contracts" value="true" />
<!-- By default, enabled only for test targets -->
<!-- To disable for test targets, run with -Duse.contracts=false -->
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
<property name="java.contracts" value="${build.dir}/java/contracts" />
<property name="contracts.version" value="1.0-20110609" />
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
<!-- where to find the tribble distro -->
<property name="tribble.dir" value="tribble" />
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
<property name="findbugs.home" value="./findbugs"/>
@ -79,6 +80,16 @@
<patternset refid="java.source.pattern" />
</fileset>
<!-- terrible hack to get gatkdocs to see all files -->
<patternset id="all.java.source.pattern">
<include name="${java.public.source.dir}/**/*.java" />
<include name="${java.private.source.dir}/**/*.java" />
</patternset>
<fileset id="all.java.source.files" dir="${basedir}">
<patternset refid="all.java.source.pattern" />
</fileset>
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
<include name="**/*.java" />
</fileset>
@ -160,6 +171,16 @@
<property name="scala.target" value="core"/>
</target>
<target name="init.buildpublic">
<!-- Set the properties needed to build public only -->
<property name="gatk.target" value="core"/>
<property name="scala.target" value="core"/>
</target>
<target name="init.usecontracts">
<property name="use.contracts" value="true" />
</target>
<target name="git.describe">
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
<arg line="describe" />
@ -234,6 +255,7 @@
<!-- Create the build directory structure used by compile -->
<mkdir dir="${build.dir}"/>
<mkdir dir="${lib.dir}"/>
<mkdir dir="${java.classes}"/>
<mkdir dir="${java.contracts}"/>
@ -265,7 +287,7 @@
</taskdef>
</target>
<target name="gatk.compile.public.source" depends="tribble,init,resolve">
<target name="gatk.compile.public.source" depends="init,resolve">
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
<compilerarg value="-proc:none"/>
</javac>
@ -326,7 +348,7 @@
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
description="create GATK contracts" if="include.contracts" />
<target name="gatk.compile" depends="tribble,init,resolve,gatk.compile.source,gatk.contracts" />
<target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
<target name="init.queue-extensions.generate" depends="gatk.compile">
<condition property="uptodate.queue-extensions.generate">
@ -457,6 +479,35 @@
</javadoc>
</target>
<target name="clean.gatkdocs">
<delete dir="gatkdocs"/>
</target>
<target name="gatkdocs" depends="gatk.compile"
description="Extract help key/value pair file from the JavaDoc tags.">
<path id="doclet.classpath">
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
</path>
<!-- Run with -Dgatkdocs.include.hidden=true to include documentation for hidden features -->
<condition property="gatkdocs.include.hidden.arg" value="-include-hidden" else="">
<isset property="gatkdocs.include.hidden" />
</condition>
<javadoc doclet="org.broadinstitute.sting.utils.help.GATKDoclet"
docletpathref="doclet.classpath"
classpathref="external.dependencies"
classpath="${java.classes}"
additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> <!-- -test to only do DocumentationTest walker -->
<sourcefiles>
<union>
<fileset refid="all.java.source.files"/>
</union>
</sourcefiles>
</javadoc>
</target>
<target name="sting.compile" depends="gatk.compile, scala.compile" />
<target name="init.jar" depends="sting.compile,extracthelp">
@ -490,6 +541,7 @@
<include name="**/utils/codecs/**/*.class"/>
<include name="**/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
</fileset>
</jar>
</target>
@ -657,54 +709,6 @@
</antcall>
</target>
<target name="test.init.compile">
<mkdir dir="${java.test.classes}"/>
<mkdir dir="${scala.test.classes}"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
</target>
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
<echo message="Sting: Compiling test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.public.test.sources}"/>
<src path="${java.private.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
<echo message="Scala: Compiling test cases!"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.public.test.sources}" />
<src path="${scala.private.test.sources}" />
<include name="**/*.scala"/>
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${scala.test.classes}"/>
<pathelement location="${java.test.classes}"/>
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
</classpath>
</scalac>
</target>
<target name="test.compile" depends="test.java.compile,test.scala.compile">
</target>
<!-- new scala target -->
<target name="scala" description="build the scala directory">
@ -718,20 +722,113 @@
<!-- ***************************************************************************** -->
<!-- where to put reports and tests-->
<property name="report" value="${build.dir}/report"/>
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
<property name="test.output" value="${dist.dir}/test"/>
<property name="java.public.test.sources" value="public/java/test"/>
<property name="java.private.test.sources" value="private/java/test"/>
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
<property name="java.public.test.classes" value="${java.test.classes}/public"/>
<property name="java.private.test.classes" value="${java.test.classes}/private"/>
<property name="java.public.test.sources" value="${public.dir}/java/test"/>
<property name="java.private.test.sources" value="${private.dir}/java/test"/>
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
<property name="scala.public.test.sources" value="public/scala/test"/>
<property name="scala.private.test.sources" value="private/scala/test"/>
<property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
<property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
<property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
<property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
<!-- provide a ceiling on the memory that unit/integration tests can consume. -->
<property name="test.maxmemory" value="4g"/>
<target name="test.init.compile">
<mkdir dir="${java.test.classes}"/>
<mkdir dir="${scala.test.classes}"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
</target>
<target name="test.java.public.compile" depends="dist,test.init.compile">
<mkdir dir="${java.public.test.classes}"/>
<echo message="Sting: Compiling public test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.public.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${testng.jar}"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
<mkdir dir="${java.private.test.classes}"/>
<echo message="Sting: Compiling private test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.private.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${testng.jar}"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
<target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
<mkdir dir="${scala.public.test.classes}"/>
<echo message="Scala: Compiling public test cases!"/>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.public.test.sources}" />
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${testng.jar}"/>
</classpath>
</scalac>
</target>
<target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
<mkdir dir="${scala.private.test.classes}"/>
<echo message="Scala: Compiling private test cases!"/>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.private.test.sources}" />
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${scala.public.test.classes}"/>
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${java.private.test.classes}"/>
<pathelement location="${testng.jar}"/>
</classpath>
</scalac>
</target>
<target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
<!-- TEST -->
<macrodef name="run-test">
<attribute name="testtype"/>
<attribute name="outputdir"/>
<attribute name="runfailed"/>
<sequential>
<condition property="run.failed.tests">
<equals arg1="@{runfailed}" arg2="true"/>
</condition>
<!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
@ -741,10 +838,10 @@
<isset property="include.contracts" />
</condition>
<mkdir dir="${report}/@{testtype}"/>
<mkdir dir="@{outputdir}"/>
<echo message="Sting: Running @{testtype} test cases!"/>
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
<testng outputDir="${report}/@{testtype}"
<taskdef resource="testngtasks" classpath="${testng.jar}"/>
<testng outputDir="@{outputdir}"
haltOnFailure="false" failureProperty="test.failure"
verbose="2"
workingDir="${basedir}"
@ -762,199 +859,163 @@
<pathelement location="${java.classes}" />
<pathelement location="${scala.classes}" />
<pathelement location="${java.contracts}" />
<pathelement location="${java.test.classes}" />
<pathelement location="${scala.test.classes}" />
<pathelement location="${java.public.test.classes}" />
<pathelement location="${java.private.test.classes}" />
<pathelement location="${scala.public.test.classes}" />
<pathelement location="${scala.private.test.classes}" />
</classpath>
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
<classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}.class" if="include.private"/>
</classfileset>
<classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}*.class" if="scala.include"/>
</classfileset>
<classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}*.class" if="include.scala.private"/>
</classfileset>
<xmlfileset dir="${basedir}">
<include name="@{testtype}" if="run.failed.tests"/>
</xmlfileset>
</testng>
<!-- generate a report for Bamboo or Hudson to read in -->
<junitreport todir="${report}/@{testtype}">
<fileset dir="${report}/@{testtype}">
<junitreport todir="@{outputdir}">
<fileset dir="@{outputdir}">
<include name="*/*.xml"/>
</fileset>
<report format="noframes" todir="${report}/@{testtype}"/>
<report format="noframes" todir="@{outputdir}"/>
</junitreport>
<fail message="test failed" if="test.failure" />
</sequential>
</macrodef>
<!-- FAILED-TEST -->
<macrodef name="run-failed-test">
<attribute name="xmlfailedtestfile" />
<sequential>
<!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
</condition>
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
<isset property="include.contracts" />
</condition>
<mkdir dir="${report}/failed_rerun" />
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
<testng outputDir="${report}/failed_rerun"
haltOnFailure="false" failureProperty="test.failure"
verbose="2"
workingDir="${basedir}"
useDefaultListeners="false"
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
<jvmarg value="-Xmx${test.maxmemory}" />
<jvmarg value="-Djava.awt.headless=true" />
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
<pathelement location="${scala.classes}" />
<pathelement location="${java.contracts}" />
<pathelement location="${java.test.classes}" />
<pathelement location="${scala.test.classes}" />
</classpath>
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
</testng>
<fail message="test failed" if="test.failure" />
</sequential>
</macrodef>
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
<target name="test" depends="test.compile,tribble.test" description="Run unit tests">
<target name="alltests">
<antcall target="test" inheritAll="false"/>
<antcall target="integrationtest" inheritAll="false"/>
<antcall target="pipelinetest" inheritAll="false"/>
</target>
<target name="alltests.public">
<antcall target="test.public" inheritAll="false"/>
<antcall target="integrationtest.public" inheritAll="false"/>
<antcall target="pipelinetest.public" inheritAll="false"/>
</target>
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
<condition property="ttype" value="*UnitTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${ttype}"/>
<run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
</target>
<target name="integrationtest" depends="test.compile" description="Run integration tests">
<target name="test.public" depends="init.buildpublic,test"/>
<target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
<condition property="itype" value="*IntegrationTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${itype}"/>
<run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
</target>
<target name="performancetest" depends="test.compile" description="Run performance tests">
<target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
<target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
<condition property="ptype" value="*PerformanceTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${ptype}"/>
<run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
</target>
<target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
<target name="performancetest.public" depends="init.buildpublic,performancetest" />
<target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
<condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${pipetype}"/>
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target>
<target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
<target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
<target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
<property name="pipeline.run" value="run"/>
<condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not>
</condition>
<run-test testtype="${pipetype}"/>
<run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target>
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
<target name="failed-test" depends="init.buildall,test.compile">
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-test" depends="test.compile">
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
<target name="failed-integration" depends="init.buildall,test.compile">
<run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-integration" depends="test.compile">
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
<target name="failed-performance" depends="init.buildall,test.compile">
<run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-performance" depends="test.compile">
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
<target name="failed-pipeline" depends="init.buildall,test.compile">
<run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target>
<target name="failed-pipeline" depends="test.compile">
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
</target>
<!-- ***************************************************************************** -->
<!-- *********** Tribble ********* -->
<!-- ***************************************************************************** -->
<target name="tribble.init" description="checks if tribble is available to build from source">
<condition property="tribble.compile.exists">
<available file="${tribble.dir}/build.xml"/>
</condition>
</target>
<!-- compile the library -->
<target name="tribble.compile" description="compiles the tribble library" depends="tribble.init" if="tribble.compile.exists">
<echo message="Building the Tribble Library..."/>
<ant antfile="build.xml" target="all" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- copy the compiled library -->
<target name="tribble.compile.copy" description="Copies the compiled tribble library" depends="tribble.compile" if="tribble.compile.exists">
<copy todir="${lib.dir}">
<fileset dir="${tribble.dir}/dist" includes="*.jar"/>
</copy>
</target>
<!-- copy the precompiled library -->
<target name="tribble.library.copy" description="Copies the precompiled tribble library" depends="tribble.init" unless="tribble.compile.exists">
<echo message="Copying the Tribble Library..."/>
<copy todir="${lib.dir}">
<fileset dir="settings/repository/org.broad" includes="tribble*.jar"/>
</copy>
</target>
<target name="tribble" description="Copies the tribble jar" depends="tribble.compile.copy,tribble.library.copy"/>
<target name="tribble.test.init" description="runs the tribble tests" depends="tribble.init">
<condition property="tribble.test.run">
<and>
<isset property="tribble.compile.exists"/>
<not><isset property="single"/></not>
</and>
</condition>
</target>
<!-- test tribble using the unit tests set in tribble -->
<target name="tribble.test" description="runs the tribble tests" depends="tribble.test.init,tribble.compile" if="tribble.test.run">
<echo message="Testing the Tribble Library..."/>
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- clean tribble -->
<target name="tribble.clean" description="cleans the tribble library" depends="tribble.init" if="tribble.compile.exists">
<echo message="Cleaning the Tribble Library..."/>
<ant antfile="build.xml" target="clean" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- ***************************************************************************** -->
<!-- ******************************************************************************** -->
<!-- Javadoc -->
<!-- ******************************************************************************** -->
<target name="clean.javadoc">
<delete dir="javadoc"/>
<delete dir="scaladoc"/>
<delete dir="${javadoc.dir}" />
</target>
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc">
<mkdir dir="javadoc"/>
<javadoc destdir="javadoc"
classpathref="external.dependencies">
<sourcepath path="${java.public.source.dir}"/>
<sourcepath path="${external.dir}"/>
<target name="init.javadoc">
<mkdir dir="${javadoc.dir}" />
</target>
<target name="javadoc" depends="init.buildpublic,generate.javadoc" description="Generates public javadoc" />
<target name="javadoc.private" depends="init.buildall,generate.javadoc" description="Generates public and private javadoc" />
<target name="generate.javadoc" depends="init.javadoc,resolve">
<javadoc destdir="${javadoc.dir}" classpathref="external.dependencies">
<fileset refid="java.source.files" />
<sourcepath path="${external.dir}" />
</javadoc>
<javadoc destdir="javadoc"
classpathref="external.dependencies">
<sourcepath path="${java.private.source.dir}"/>
<exclude name="**" unless="include.private" />
</javadoc>
<mkdir dir="scaladoc"/>
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<src path="${scala.public.source.dir}"/>
<src path="${scala.private.source.dir}"/>
<src path="${queue-extensions.source.dir}"/>
<include name="**/*.scala"/>
</target>
<!-- ******************************************************************************** -->
<!-- Scaladoc -->
<!-- ******************************************************************************** -->
<target name="clean.scaladoc">
<delete dir="${scaladoc.dir}" />
</target>
<target name="init.scaladoc">
<mkdir dir="${scaladoc.dir}" />
</target>
<!-- NOTE: the scaladoc targets require that the environment variable ANT_OPTS has been set to "-Xmx1G" -->
<target name="scaladoc" depends="init.buildpublic,generate.scaladoc" description="Generates public scaladoc -- set ANT_OPTS to -Xmx1G" />
<target name="scaladoc.private" depends="init.buildall,generate.scaladoc" description="Generates public and private scaladoc -- set ANT_OPTS to -Xmx1G" />
<target name="generate.scaladoc" depends="resolve,queue-extensions.generate,init.scala.compile,scala.compile,init.scaladoc">
<scaladoc srcdir="${basedir}" destdir="${scaladoc.dir}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<include name="${scala.public.source.dir}/**/*.scala" />
<include name="${queue-extensions.source.dir}/**/*.scala" />
<include name="${scala.private.source.dir}/**/*.scala" if="include.private" />
</scaladoc>
</target>
<!-- ******************************************************************************** -->
<!-- Release-related tasks -->
<!-- ******************************************************************************** -->
<!-- Unzip all classes from their current locations and assemble them in a staging directory -->
<target name="stage" description="stage files for distribution">
<mkdir dir="staging"/>
@ -1044,7 +1105,7 @@
</findbugs>
</target>
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc">
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
<delete dir="out"/>
<delete dir="${build.dir}"/>
<delete dir="${lib.dir}"/>

View File

@ -12,6 +12,9 @@
<dependency org="net.sf" name="picard" rev="latest.integration"/>
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
<!-- Tribble -->
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
<dependency org="log4j" name="log4j" rev="1.2.15">
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
<exclude org="javax.mail" />
@ -30,6 +33,9 @@
<!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<!-- Dependencies for the html walker documention -->
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
<!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/>

View File

@ -0,0 +1,169 @@
library(gsalib)
require("ggplot2")
require("gplots")
#
# Standard command line switch. Can we loaded interactively for development
# or executed with RScript
#
args = commandArgs(TRUE)
onCMDLine = ! is.na(args[1])
if ( onCMDLine ) {
inputFileName = args[1]
outputPDF = args[2]
} else {
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA
}
RUNTIME_UNITS = "(sec)"
ORIGINAL_UNITS_TO_SECONDS = 1/1000
#
# Helper function to aggregate all of the jobs in the report across all tables
#
allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub)
}
#
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
#
plotJobsGantt <- function(gatkReport, sortOverall) {
allJobs = allJobsFromReport(gatkReport)
if ( sortOverall ) {
title = "All jobs, by analysis, by start time"
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
} else {
title = "All jobs, sorted by start time"
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
}
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
maxRelTime = max(allJobs$relDoneTime)
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
p <- p + xlim(0, maxRelTime * 1.1)
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
p <- p + ylab("Job")
p <- p + opts(title=title)
print(p)
}
#
# Plots scheduling efficiency at job events
#
plotProgressByTime <- function(gatkReport) {
allJobs = allJobsFromReport(gatkReport)
nJobs = dim(allJobs)[1]
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
countJobs <- function(p) {
s = allJobs$relStartTime
e = allJobs$relDoneTime
x = c() # I wish I knew how to make this work with apply
for ( time in times )
x = c(x, sum(p(s, e, time)))
x
}
pending = countJobs(function(s, e, t) s > t)
done = countJobs(function(s, e, t) e < t)
running = nJobs - pending - done
d = data.frame(times=times, pending=pending, running=running, done=done)
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + geom_line(size=2)
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
p <- p + opts(title = "Job scheduling")
print(p)
}
#
# Creates tables for each job in this group
#
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
plotGroup <- function(groupTable) {
name = unique(groupTable$analysisName)[1]
groupAnnotations = setdiff(names(groupTable), standardColumns)
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
# create a table showing each job and all annotations
textplot(sub, show.rownames=F)
title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
# todo -- how do we group by annotations?
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
p <- p + opts(title=paste("Job runtime histogram for", name))
print(p)
}
# as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) {
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}
}
# print out some useful basic information
print("Report")
print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) {
convertGroup <- function(g) {
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
g
}
lapply(gatkReportData, convertGroup)
}
# read the table
gatkReportData <- gsa.read.gatkreport(inputFileName)
gatkReportData <- convertUnits(gatkReportData)
#print(summary(gatkReportData))
if ( ! is.na(outputPDF) ) {
pdf(outputPDF, height=8.5, width=11)
}
plotJobsGantt(gatkReportData, T)
plotJobsGantt(gatkReportData, F)
plotProgressByTime(gatkReportData)
for ( group in gatkReportData ) {
plotGroup(group)
}
if ( ! is.na(outputPDF) ) {
dev.off()
}

View File

@ -20,6 +20,20 @@
assign(tableName, d, envir=tableEnv);
}
# Read a fixed width line of text into a list.
.gsa.splitFixedWidth <- function(line, columnStarts) {
splitStartStop <- function(x) {
x = substring(x, starts, stops);
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
x;
}
starts = c(1, columnStarts);
stops = c(columnStarts - 1, nchar(line));
sapply(line, splitStartStop)[,1];
}
# Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE);
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
tableName = NA;
tableHeader = c();
tableRows = c();
version = NA;
for (line in lines) {
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) {
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
tableName = headerFields[2];
tableHeader = c();
tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing
} else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) {
tableHeader = row;
tableHeader = row;
} else {
tableRows = rbind(tableRows, row);
}

View File

@ -25,7 +25,6 @@
package net.sf.picard.reference;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
* Produces fai file with same output as samtools faidx
*/
public class FastaSequenceIndexBuilder {
public File fastaFile;
ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file
final public File fastaFile;
final boolean printProgress;
// keep track of location in file
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) {
this.progress = progress;
public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
this.fastaFile = fastaFile;
fileLength = fastaFile.length();
this.printProgress = printProgress;
}
/**
@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
if (System.currentTimeMillis() - lastTimestamp > 10000) {
int percentProgress = (int) (100*bytesRead/fileLength);
if (progress != null)
progress.percentProgress(percentProgress);
if (printProgress)
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
lastTimestamp = System.currentTimeMillis();
}
}

View File

@ -31,30 +31,85 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Created by IntelliJ IDEA.
* User: rpoplin
* Date: Dec 1, 2009
* Call R scripts to plot residual error versus the various covariates.
*
* <p>
* After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
* reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
* show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
* In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
* CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
* by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
* error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
* bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
*
* <p>
* The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
* each of the quality score estimates. It is defined as follows for N, the number of observations:
*
* <ul>
* <li>light blue means N < 1,000</li>
* <li>cornflower blue means 1,000 <= N < 10,000</li>
* <li>dark blue means N >= 10,000</li>
* <li>The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
* meaningless and so aren't included in any of the numerical calculations.</li>
* </ul>
*
* <p>
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
*
* <p>
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
*
* <h2>Input</h2>
* <p>
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx4g -jar AnalyzeCovariates.jar \
* -recalFile /path/to/recal.table.csv \
* -outputDir /path/to/output_dir/ \
* -resources resources/ \
* -ignoreQ 5
* </pre>
*
* Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
*/
@DocumentedGATKFeature(
groupName = "AnalyzeCovariates",
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
public class AnalyzeCovariates extends CommandLineProgram {
/////////////////////////////
// Command Line Arguments
/////////////////////////////
/**
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
@ -65,13 +120,22 @@ public class AnalyzeCovariates extends CommandLineProgram {
private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5;
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
/**
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
*/
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
private int MAX_QUALITY_SCORE = 50;
/**
* This argument is useful for comparing before/after plots and you want the axes to match each other.
*/
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
private int MAX_HISTOGRAM_VALUE = 0;
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots")
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
private boolean DO_INDEL_QUALITY = false;
@ -261,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram {
}
private void callRScripts() {
RScriptExecutor.RScriptArgumentCollection argumentCollection =
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
int numReadGroups = 0;
// for each read group
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
Process p;
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
String readGroup = readGroupKey.toString();
@ -276,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram {
// for each covariate
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
Covariate cov = requestedCovariates.get(iii);
try {
if (DO_INDEL_QUALITY) {
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " +
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
p.waitFor();
} else {
final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
if (DO_INDEL_QUALITY) {
executor.callRScripts("plot_indelQuality.R", outputFilename,
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
} else {
if( iii == 1 ) {
// Analyze reported quality
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " +
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
p.waitFor();
} else { // Analyze all other covariates
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
p.waitFor();
}
// Analyze reported quality
executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
} else { // Analyze all other covariates
executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
}
} catch (InterruptedException e) {
e.printStackTrace();
System.exit(-1);
} catch (IOException e) {
System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option.");
e.printStackTrace();
System.exit(-1);
}
}
} else { // at the maximum number of read groups so break out

View File

@ -0,0 +1,4 @@
/**
* Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
*/
package org.broadinstitute.sting.analyzecovariates;

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import java.lang.annotation.*;
/**
* Indicates that a walker argument should is considered an advanced option.
*
* @author Mark DePristo
* @version 0.1
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE,ElementType.FIELD})
public @interface Advanced {
}

View File

@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
public boolean matches( ArgumentDefinition definition, Object key ) {
return definition.validation != null;
// We can perform some sort of validation for anything that isn't a flag.
return !definition.isFlag;
}
};
}

View File

@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
public final String label;
/**
* Maps indicies of command line arguments to values paired with that argument.
* Maps indices of command line arguments to values paired with that argument.
*/
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();

View File

@ -151,6 +151,14 @@ public class ArgumentSource {
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
}
/**
* Is the given argument considered an advanced option when displaying on the command-line argument system.
* @return True if so. False otherwise.
*/
public boolean isAdvanced() {
return field.isAnnotationPresent(Advanced.class);
}
/**
* Is this command-line argument dependent on some primitive argument types?
* @return True if this command-line argument depends on other arguments; false otherwise.
@ -175,13 +183,17 @@ public class ArgumentSource {
return typeDescriptor.createsTypeDefault(this);
}
public String typeDefaultDocString() {
return typeDescriptor.typeDefaultDocString(this);
}
/**
* Generates a default for the given type.
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @return A default value for the given type.
*/
public Object createTypeDefault(ParsingEngine parsingEngine) {
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType());
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType());
}
/**

View File

@ -26,6 +26,8 @@
package org.broadinstitute.sting.commandline;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Multiplex;
import org.broadinstitute.sting.gatk.walkers.Multiplexer;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.lang.annotation.Annotation;
import java.lang.reflect.*;
import java.util.*;
@ -80,14 +83,26 @@ public abstract class ArgumentTypeDescriptor {
*/
public boolean createsTypeDefault(ArgumentSource source) { return false; }
/**
* Returns a documentation-friendly value for the default of a type descriptor.
* Must be overridden if createsTypeDefault return true. cannot be called otherwise
* @param source Source of the command-line argument.
* @return Friendly string of the default value, for documentation. If doesn't create a default, throws
* and UnsupportedOperationException
*/
public String typeDefaultDocString(ArgumentSource source) {
throw new UnsupportedOperationException();
}
/**
* Generates a default for the given type.
*
* @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @param source Source of the command-line argument.
* @param type Type of value to create, in case the command-line argument system wants influence.
* @return A default value for the given type.
*/
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
/**
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
@ -109,7 +124,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parsed object.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
return parse(parsingEngine, source, source.field.getType(), matches);
return parse(parsingEngine, source, source.field.getGenericType(), matches);
}
/**
@ -131,18 +146,18 @@ public abstract class ArgumentTypeDescriptor {
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
Annotation argumentAnnotation = getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
source.field.getType(),
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
ArgumentDefinition.getShortName(argumentAnnotation),
ArgumentDefinition.getDoc(argumentAnnotation),
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
source.isFlag(),
source.isMultiValued(),
source.isHidden(),
getCollectionComponentType(source.field),
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
ArgumentDefinition.getValidationRegex(argumentAnnotation),
getValidOptions(source) );
source.field.getType(),
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
ArgumentDefinition.getShortName(argumentAnnotation),
ArgumentDefinition.getDoc(argumentAnnotation),
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
source.isFlag(),
source.isMultiValued(),
source.isHidden(),
makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
ArgumentDefinition.getExclusiveOf(argumentAnnotation),
ArgumentDefinition.getValidationRegex(argumentAnnotation),
getValidOptions(source) );
}
/**
@ -151,7 +166,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parameterized component type, or String.class if the parameterized type could not be found.
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
protected Class getCollectionComponentType( Field field ) {
protected Type getCollectionComponentType( Field field ) {
return null;
}
@ -162,7 +177,7 @@ public abstract class ArgumentTypeDescriptor {
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type.
*/
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches );
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
/**
* If the argument source only accepts a small set of options, populate the returned list with
@ -273,6 +288,123 @@ public abstract class ArgumentTypeDescriptor {
public static boolean isArgumentHidden(Field field) {
return field.isAnnotationPresent(Hidden.class);
}
public Class makeRawTypeIfNecessary(Type t) {
if ( t == null )
return null;
else if ( t instanceof ParameterizedType )
return (Class)((ParameterizedType) t).getRawType();
else if ( t instanceof Class ) {
return (Class)t;
} else {
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
}
}
}
/**
* Parser for RodBinding objects
*/
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* We only want RodBinding class objects
* @param type The type to check.
* @return true if the provided class is a RodBinding.class
*/
@Override
public boolean supports( Class type ) {
return isRodBinding(type);
}
public static boolean isRodBinding( Class type ) {
return RodBinding.class.isAssignableFrom(type);
}
@Override
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
@Override
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
}
@Override
public String typeDefaultDocString(ArgumentSource source) {
return "none";
}
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
try {
String name = defaultDefinition.fullName;
String tribbleType = null;
Tags tags = getArgumentTags(matches);
// must have one or two tag values here
if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only suport -X:type and -X:name,type argument styles",
value, source.field.getName()));
} if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style
name = tags.getPositionalTags().get(0);
tribbleType = tags.getPositionalTags().get(1);
} else {
// case with 0 or 1 positional tags
FeatureManager manager = new FeatureManager();
// -X:type style is a type when we cannot determine the type dynamically
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
if ( tag1 != null ) {
if ( manager.getByName(tag1) != null ) // this a type
tribbleType = tag1;
else
name = tag1;
}
if ( tribbleType == null ) {
// try to determine the file type dynamically
File file = new File(value);
if ( file.canRead() && file.isFile() ) {
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
}
}
if ( tribbleType == null )
if ( ! file.canRead() | ! file.isFile() ) {
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
} else {
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
return result;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s.",
value, source.field.getName()));
} catch (Exception e) {
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s. Message: %s",
value, source.field.getName(), e.getMessage()));
}
}
}
/**
@ -282,9 +414,10 @@ public abstract class ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
public boolean supports( Class type ) {
if( type.isPrimitive() ) return true;
if( type.isEnum() ) return true;
if( primitiveToWrapperMap.containsValue(type) ) return true;
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
if ( type.isPrimitive() ) return true;
if ( type.isEnum() ) return true;
if ( primitiveToWrapperMap.containsValue(type) ) return true;
try {
type.getConstructor(String.class);
@ -298,7 +431,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
Class type = makeRawTypeIfNecessary(fulltype);
if (source.isFlag())
return true;
@ -339,7 +473,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw e;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
value, source.field.getName()));
value, source.field.getName()));
} catch (Exception e) {
throw new DynamicClassResolutionException(String.class, e);
}
@ -351,7 +485,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return result;
}
/**
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
@ -382,10 +516,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override
@SuppressWarnings("unchecked")
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) {
Class componentType;
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
Class type = makeRawTypeIfNecessary(fulltype);
Type componentType;
Object result;
Tags tags;
if( Collection.class.isAssignableFrom(type) ) {
@ -399,7 +533,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
componentType = getCollectionComponentType( source.field );
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
Collection collection;
try {
@ -428,7 +562,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
else if( type.isArray() ) {
componentType = type.getComponentType();
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType);
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
// Assemble a collection of individual values used in this computation.
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
@ -436,7 +570,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
for( ArgumentMatch value: match )
values.add(value);
result = Array.newInstance(componentType,values.size());
result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
int i = 0;
for( ArgumentMatch value: values ) {
@ -459,16 +593,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
@Override
protected Class getCollectionComponentType( Field field ) {
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
return (Class)parameterizedType.getActualTypeArguments()[0];
}
else
return String.class;
protected Type getCollectionComponentType( Field field ) {
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
return parameterizedType.getActualTypeArguments()[0];
}
else
return String.class;
}
}
@ -510,12 +644,12 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(multiplexer == null || multiplexedIds == null)
throw new ReviewedStingException("No multiplexed ids available");
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
Class componentType = getCollectionComponentType(source.field);
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
for(Object id: multiplexedIds) {
@ -527,15 +661,19 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return multiplexedMapping;
}
@Override
public String typeDefaultDocString(ArgumentSource source) {
return "None";
}
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) {
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
if(multiplexedIds == null)
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
Class componentType = getCollectionComponentType(source.field);
Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
for(Object id: multiplexedIds) {
@ -606,7 +744,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/
@Override
protected Class getCollectionComponentType( Field field ) {
protected Type getCollectionComponentType( Field field ) {
// Multiplex arguments must resolve to maps from which the clp should extract the second type.
if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();

View File

@ -43,7 +43,7 @@ import java.util.Locale;
public abstract class CommandLineProgram {
/** The command-line program and the arguments it returned. */
protected ParsingEngine parser = null;
public ParsingEngine parser = null;
/** the default log level */
@Argument(fullName = "logging_level",
@ -144,6 +144,11 @@ public abstract class CommandLineProgram {
public static int result = -1;
@SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) throws Exception {
start(clp, args, false);
}
/**
* This function is called to start processing the command line, and kick
* off the execute message of the program.
@ -153,7 +158,7 @@ public abstract class CommandLineProgram {
* @throws Exception when an exception occurs
*/
@SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) throws Exception {
public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception {
try {
// setup our log layout
@ -180,8 +185,9 @@ public abstract class CommandLineProgram {
// - InvalidArgument in case these arguments are specified by plugins.
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've
// determined the full complement of arguments.
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
ParsingEngine.ValidationType.InvalidArgument));
if ( ! dryRun )
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
ParsingEngine.ValidationType.InvalidArgument));
parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line.
@ -195,36 +201,40 @@ public abstract class CommandLineProgram {
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
if ( ! dryRun ) parser.validate();
} else {
parser.parse(args);
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
if ( ! dryRun ) {
if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate();
parser.validate();
}
parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line.
clp.setupLoggerLevel(layout);
}
// if they specify a log location, output our data there
if (clp.toFile != null) {
FileAppender appender;
try {
appender = new FileAppender(layout, clp.toFile, false);
logger.addAppender(appender);
} catch (IOException e) {
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
if ( ! dryRun ) {
// if they specify a log location, output our data there
if (clp.toFile != null) {
FileAppender appender;
try {
appender = new FileAppender(layout, clp.toFile, false);
logger.addAppender(appender);
} catch (IOException e) {
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
}
}
// regardless of what happens next, generate the header information
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
// call the execute
CommandLineProgram.result = clp.execute();
}
// regardless of what happens next, generate the header information
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
// call the execute
CommandLineProgram.result = clp.execute();
}
catch (ArgumentException e) {
clp.parser.printHelp(clp.getApplicationDetails());

View File

@ -55,7 +55,7 @@ public @interface Output {
* --help argument is specified.
* @return Doc string associated with this command-line argument.
*/
String doc() default "An output file presented to the walker. Will overwrite contents if file exists.";
String doc() default "An output file created by the walker. Will overwrite contents if file exists";
/**
* Is this argument required. If true, the command-line argument system will

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -41,11 +42,16 @@ import java.util.*;
* A parser for Sting command-line arguments.
*/
public class ParsingEngine {
/**
* The loaded argument sources along with their back definitions.
*/
private Map<ArgumentDefinition,ArgumentSource> argumentSourcesByDefinition = new HashMap<ArgumentDefinition,ArgumentSource>();
/**
* A list of defined arguments against which command lines are matched.
* Package protected for testing access.
*/
ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
/**
* A list of matches from defined arguments to command-line text.
@ -59,11 +65,17 @@ public class ParsingEngine {
*/
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
/**
* All of the RodBinding objects we've seen while parsing
*/
private List<RodBinding> rodBindings = new ArrayList<RodBinding>();
/**
* Class reference to the different types of descriptors that the create method can create.
* The type of set used must be ordered (but not necessarily sorted).
*/
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) );
@ -80,6 +92,7 @@ public class ParsingEngine {
protected static Logger logger = Logger.getLogger(ParsingEngine.class);
public ParsingEngine( CommandLineProgram clp ) {
RodBinding.resetNameCounter();
parsingMethods.add( ParsingMethod.FullNameParsingMethod );
parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
@ -107,8 +120,13 @@ public class ParsingEngine {
*/
public void addArgumentSource( String sourceName, Class sourceClass ) {
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) )
argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() );
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
List<ArgumentDefinition> argumentDefinitions = argumentSource.createArgumentDefinitions();
for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
argumentsFromSource.add( argumentDefinition );
}
}
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
}
@ -199,16 +217,25 @@ public class ParsingEngine {
throw new InvalidArgumentException( invalidArguments );
}
// Find invalid argument values (arguments that fail the regexp test.
// Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
Collection<ArgumentDefinition> verifiableArguments =
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
// Check to see whether an argument value was specified. Argument values must be provided
// when the argument name is specified and the argument is not a flag type.
for(ArgumentMatch verifiableMatch: verifiableMatches) {
ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
invalidValues.add(new Pair<ArgumentDefinition,String>(verifiableArgument,null));
}
// Ensure that the field contents meet the validation criteria specified by the regular expression.
for( ArgumentMatch verifiableMatch: verifiableMatches ) {
for( String value: verifiableMatch.values() ) {
if( !value.matches(verifiableArgument.validation) )
if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
}
}
@ -304,7 +331,17 @@ public class ParsingEngine {
if(!tags.containsKey(key))
return new Tags();
return tags.get(key);
}
}
/**
* Add a RodBinding type argument to this parser. Called during parsing to allow
* us to track all of the RodBindings discovered in the command line.
* @param rodBinding the rodbinding to add. Must not be added twice
*/
@Requires("rodBinding != null")
public void addRodBinding(final RodBinding rodBinding) {
rodBindings.add(rodBinding);
}
/**
* Notify the user that a deprecated command-line argument has been used.
@ -327,7 +364,7 @@ public class ParsingEngine {
*/
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
// Nothing to load
if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired()))
if( argumentMatches.size() == 0 && ! source.createsTypeDefault() )
return;
// Target instance into which to inject the value.
@ -344,6 +381,10 @@ public class ParsingEngine {
}
}
public Collection<RodBinding> getRodBindings() {
return Collections.unmodifiableCollection(rodBindings);
}
/**
* Gets a collection of the container instances of the given type stored within the given target.
* @param source Argument source.
@ -390,7 +431,6 @@ public class ParsingEngine {
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
}
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
// now simply call into the truly general routine extract argument bindings but with a null
// object so bindings aren't computed
@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
StringBuilder sb = new StringBuilder();
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
invalidValue.first.fullName,
invalidValue.second,
invalidValue.first.validation) );
if(invalidValue.getSecond() == null)
sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
invalidValue.first.fullName) );
else
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
invalidValue.first.fullName,
invalidValue.second,
invalidValue.first.validation) );
}
return sb.toString();
}

View File

@ -0,0 +1,187 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import java.util.*;
/**
* A RodBinding representing a walker argument that gets bound to a ROD track.
*
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
* is explicitly typed with type of the Tribble.Feature expected to be produced by this
* argument. The GATK Engine takes care of initializing the binding and connecting it
* to the RMD system.
*
* It is recommended that optional RodBindings be initialized to the value returned
* by the static method makeUnbound().
*
* Note that this class is immutable.
*/
public final class RodBinding<T extends Feature> {
protected final static String UNBOUND_VARIABLE_NAME = "";
protected final static String UNBOUND_SOURCE = "UNBOUND";
protected final static String UNBOUND_TRIBBLE_TYPE = "";
/**
* Create an unbound Rodbinding of type. This is the correct programming
* style for an optional RodBinding<T>
*
* At Input()
* RodBinding<T> x = RodBinding.makeUnbound(T.class)
*
* The unbound binding is guaranteed to never match any binding. It uniquely
* returns false to isBound().
*
* @param type the Class type produced by this unbound object
* @param <T> any class extending Tribble Feature
* @return the UNBOUND RodBinding producing objects of type T
*/
@Requires("type != null")
protected final static <T extends Feature> RodBinding<T> makeUnbound(Class<T> type) {
return new RodBinding<T>(type);
}
/** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
final private String name;
/** where the data for this ROD is coming from. A file or special value if coming from stdin */
final private String source;
/** the string name of the tribble type, such as vcf, bed, etc. */
final private String tribbleType;
/** The command line tags associated with this RodBinding */
final private Tags tags;
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
final private Class<T> type;
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
final private boolean bound;
/**
* The name counter. This is how we create unique names for collections of RodBindings
* on the command line. If you have provide the GATK with -X file1 and -X file2 to a
* RodBinding argument as List<RodBinding<T>> then each binding will receive automatically
* the name of X and X2.
*/
final private static Map<String, Integer> nameCounter = new HashMap<String, Integer>();
/** for UnitTests */
final public static void resetNameCounter() {
nameCounter.clear();
}
@Requires("rawName != null")
@Ensures("result != null")
final private static synchronized String countedVariableName(final String rawName) {
Integer count = nameCounter.get(rawName);
if ( count == null ) {
nameCounter.put(rawName, 1);
return rawName;
} else {
nameCounter.put(rawName, count + 1);
return rawName + (count + 1);
}
}
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
public RodBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
this.type = type;
this.name = countedVariableName(rawName);
this.source = source;
this.tribbleType = tribbleType;
this.tags = tags;
this.bound = true;
}
/**
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
* @param type class this unbound RodBinding creates
*/
@Requires({"type != null"})
private RodBinding(Class<T> type) {
this.type = type;
this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
this.source = UNBOUND_SOURCE;
this.tribbleType = UNBOUND_TRIBBLE_TYPE;
this.tags = new Tags();
this.bound = false;
}
/**
* @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
*/
final public boolean isBound() {
return bound;
}
/**
* @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
*/
@Ensures({"result != null"})
final public String getName() {
return name;
}
/**
* @return the string name of the tribble type, such as vcf, bed, etc.
*/
@Ensures({"result != null"})
final public Class<T> getType() {
return type;
}
/**
* @return where the data for this ROD is coming from. A file or special value if coming from stdin
*/
@Ensures({"result != null"})
final public String getSource() {
return source;
}
/**
* @return The command line tags associated with this RodBinding. Will include the tags used to
* determine the name and type of this RodBinding
*/
@Ensures({"result != null"})
final public Tags getTags() {
return tags;
}
/**
* @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
*/
@Ensures({"result != null"})
final public String getTribbleType() {
return tribbleType;
}
@Override
public String toString() {
return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
}
}

View File

@ -25,21 +25,20 @@
package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.text.ListFileUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.*;
/**
* @author aaron
@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/
private final Collection<Object> argumentSources = new ArrayList<Object>();
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
/**
* this is the function that the inheriting class can expect to have called
* when the command line system has initialized.
@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
// File lists can require a bit of additional expansion. Set these explicitly by the engine.
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
engine.setWalker(walker);
walker.setToolkit(engine);
@ -96,6 +96,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
loadArgumentsIntoObject(walker);
argumentSources.add(walker);
Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
// todo: remove me when the old style system is removed
if ( getArgumentCollection().RODBindings.size() > 0 ) {
logger.warn("################################################################################");
logger.warn("################################################################################");
logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2.");
logger.warn("Please use arguments defined by each specific walker instead.");
for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
}
logger.warn("################################################################################");
logger.warn("################################################################################");
System.exit(1);
}
engine.setReferenceMetaDataFiles(rodBindings);
for (ReadFilter filter: filters) {
loadArgumentsIntoObject(filter);
argumentSources.add(filter);
@ -112,6 +130,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0;
}
/**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
* This report will be written to either STDOUT or to the run repository, depending on the options
@ -142,7 +161,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
new SAMFileReaderArgumentTypeDescriptor(engine),
new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
new OutputStreamArgumentTypeDescriptor(engine,System.out) );
}

View File

@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.*;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*;
/**
* @author aaron
* @version 1.0
* @date May 8, 2009
* <p/>
* Class CommandLineGATK
* <p/>
* The GATK engine itself. Manages map/reduce data access and runs walkers.
*
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
* the gatk engine should deal with any data related information.
*/
@DocumentedGATKFeature(
groupName = "GATK Engine",
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
extraDocs = { UserException.class })
public class CommandLineGATK extends CommandLineExecutable {
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
private String analysisName = null;
@ -173,12 +175,12 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
formatter.format("Description:%n");
formatter.format("Available Reference Ordered Data types:%n");
formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
formatter.format("%n");
WalkerManager walkerManager = engine.getWalkerManager();
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType);
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
formatter.format("For a full description of this walker, see its GATKdocs at:%n");
formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
return additionalHelp.toString();
}
@ -192,8 +194,6 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp);
formatter.format("Available analyses:%n");
// Get the list of walker names from the walker manager.
WalkerManager walkerManager = engine.getWalkerManager();

View File

@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.*;
@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
throw new ArgumentException("Walker does not allow a reference but one was provided.");
}
/**
* Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
* 'allowed' is still present.
*
* @param rods Reference-ordered data to load.
*/
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
// Check to make sure that all required metadata is present.
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
for (RMD required : allRequired) {
boolean found = false;
for (ReferenceOrderedDataSource rod : rods) {
if (rod.matchesNameAndRecordType(required.name(), required.type()))
found = true;
}
if (!found)
throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
"Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
}
// Check to see that no forbidden rods are present.
for (ReferenceOrderedDataSource rod : rods) {
if (!WalkerManager.isAllowed(walker, rod))
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
}
}
protected void validateSuppliedIntervals() {
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
if(!(walker instanceof ReadWalker)) {
@ -716,8 +689,6 @@ public class GenomeAnalysisEngine {
validateSuppliedReads();
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
for (ReadFilter filter : filters)
filter.initialize(this);
@ -926,9 +897,6 @@ public class GenomeAnalysisEngine {
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
// try and make the tracks given their requests
// create of live instances of the tracks
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
@ -939,7 +907,6 @@ public class GenomeAnalysisEngine {
flashbackData()));
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
validateSuppliedReferenceOrderedData(dataSources);
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
return dataSources;
@ -994,7 +961,7 @@ public class GenomeAnalysisEngine {
/**
* Get the list of intervals passed to the engine.
* @return List of intervals.
* @return List of intervals, or null if no intervals are in use
*/
public GenomeLocSortedSet getIntervals() {
return this.intervals;

View File

@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DescriptionTaglet;
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
import org.broadinstitute.sting.utils.help.SummaryTaglet;
import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*;
@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager<Walker> {
* @return A suitable display name for the package.
*/
public String getPackageDisplayName(String packageName) {
// Try to find an override for the display name of this package.
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME);
String displayName;
if(helpText.containsKey(displayNameKey)) {
displayName = helpText.getString(displayNameKey);
}
else {
// If no override exists...
// ...try to compute the override from the text of the package name, while accounting for
// unpackaged walkers.
displayName = packageName.substring(packageName.lastIndexOf('.')+1);
if(displayName.trim().equals("")) displayName = "<unpackaged>";
}
// ...try to compute the override from the text of the package name, while accounting for
// unpackaged walkers.
String displayName = packageName.substring(packageName.lastIndexOf('.')+1);
if (displayName.trim().equals("")) displayName = "<unpackaged>";
return displayName;
}
@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Package help text, or "" if none exists.
*/
public String getPackageSummaryText(String packageName) {
String key = String.format("%s.%s",packageName,SummaryTaglet.NAME);
String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(key))
return "";
return helpText.getString(key);
@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Walker summary description, or "" if none exists.
*/
public String getWalkerSummaryText(Class<? extends Walker> walkerType) {
String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME);
String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(walkerSummary))
return "";
return helpText.getString(walkerSummary);
@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Walker full description, or "" if none exists.
*/
public String getWalkerDescriptionText(Class<? extends Walker> walkerType) {
String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME);
String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME);
if(!helpText.containsKey(walkerDescription))
return "";
return helpText.getString(walkerDescription);
@ -188,19 +177,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return The list of allowed reference meta data.
*/
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
Allows allowsDataSource = getWalkerAllowed(walkerClass);
if (allowsDataSource == null)
return Collections.<RMD>emptyList();
return Arrays.asList(allowsDataSource.referenceMetaData());
}
/**
* Get a list of RODs allowed by the walker.
* @param walker Walker to query.
* @return The list of allowed reference meta data.
*/
public static List<RMD> getAllowsMetaData(Walker walker) {
return getAllowsMetaData(walker.getClass());
return Collections.<RMD>emptyList();
}
/**
@ -237,24 +214,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return True if the walker forbids this data type. False otherwise.
*/
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
Allows allowsDataSource = getWalkerAllowed(walkerClass);
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
if( allowsDataSource == null )
return true;
// The difference between unspecified RMD and the empty set of metadata can't be detected.
// Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
// case in the future.
if( allowsDataSource.referenceMetaData().length == 0 )
return true;
for( RMD allowed: allowsDataSource.referenceMetaData() ) {
if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
return true;
}
return false;
return true;
}
/**
@ -294,8 +254,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return The list of required reference meta data.
*/
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
Requires requiresDataSource = getWalkerRequirements(walkerClass);
return Arrays.asList(requiresDataSource.referenceMetaData());
return Collections.emptyList();
}
/**

View File

@ -23,8 +23,26 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.datasources.reference;
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.*;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class DbsnpArgumentCollection {
/**
* A dbSNP VCF file.
*/
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
public RodBinding<VariantContext> dbsnp;
public interface ReferenceDataSourceProgressListener {
public void percentProgress(int percent);
}

View File

@ -101,6 +101,8 @@ public class GATKArgumentCollection {
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null;
@Deprecated
@Hidden
@ElementList(required = false)
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>();
@ -117,11 +119,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false;
@Element(required = false)
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
public String DBSNPFile = null;
/**
* The override mechanism in the GATK, by default, populates the command-line arguments, then
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
@ -345,14 +342,6 @@ public class GATKArgumentCollection {
return false;
}
}
if (other.RODBindings.size() != RODBindings.size()) {
return false;
}
for (int x = 0; x < RODBindings.size(); x++) {
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
return false;
}
}
if (!other.samFiles.equals(this.samFiles)) {
return false;
}
@ -380,9 +369,6 @@ public class GATKArgumentCollection {
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
return false;
}
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
return false;
}
if (!other.unsafe.equals(this.unsafe)) {
return false;
}

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.Root;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class StandardVariantContextInputArgumentCollection {
/**
* Variants from this VCF file are used by this tool as input.
* The file must at least contain the standard VCF header lines, but
* can be empty (i.e., no variants are contained in the file).
*/
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
}

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList;
@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
* @param loc Locus at which to track.
* @return A tracker containing information about this locus.
*/
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size());
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
for ( ReferenceOrderedDataState state: states )
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) );
return tracks;
// todo -- warning, I removed the reference to the name from states
bindings.add( state.iterator.seekForward(loc) );
return new RefMetaDataTracker(bindings, referenceContext);
}
/**

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc;
public interface ReferenceOrderedView extends View {
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc );
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/
private RODMergingIterator rodQueue = null;
RefMetaDataTracker tracker = null;
Collection<RODRecordList> allTracksHere;
GenomeLoc lastLoc = null;
RODRecordList interval = null;
@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
}
rodQueue = new RODMergingIterator(iterators);
//throw new StingException("RodLocusView currently disabled");
}
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) {
return tracker;
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
// special case the interval again -- add it into the ROD
if ( interval != null ) { allTracksHere.add(interval); }
return new RefMetaDataTracker(allTracksHere, referenceContext);
}
public boolean hasNext() {
@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
// Update the tracker here for use
Collection<RODRecordList> allTracksHere = getSpanningTracks(datum);
tracker = createTracker(allTracksHere);
allTracksHere = getSpanningTracks(datum);
GenomeLoc rodSite = datum.getLocation();
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
}
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
for ( RODRecordList track : allTracksHere ) {
if ( ! t.hasROD(track.getName()) )
t.bind(track.getName(), track);
}
// special case the interval again -- add it into the ROD
if ( interval != null ) { t.bind(interval.getName(), interval); }
return t;
}
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
return rodQueue.allElementsLTE(marker);
}
@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return getSkippedBases(getLocOneBeyondShard());
}
public RefMetaDataTracker getTracker() {
return tracker;
}
/**
* Closes the current view.
*/
@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
state.dataSource.close( state.iterator );
rodQueue = null;
tracker = null;
allTracksHere = null;
}
}

View File

@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
*/
public FilePointer next() {
FilePointer current = wrappedIterator.next();
//while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
// current = current.combine(parser,wrappedIterator.next());
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
current = current.combine(parser,wrappedIterator.next());
return current;
}

View File

@ -893,6 +893,7 @@ public class SAMDataSource {
* Custom representation of interval bounds.
* Makes it simpler to track current position.
*/
private int[] intervalContigIndices;
private int[] intervalStarts;
private int[] intervalEnds;
@ -917,12 +918,14 @@ public class SAMDataSource {
if(foundMappedIntervals) {
if(keepOnlyUnmappedReads)
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
this.intervalContigIndices = new int[intervals.size()];
this.intervalStarts = new int[intervals.size()];
this.intervalEnds = new int[intervals.size()];
int i = 0;
for(GenomeLoc interval: intervals) {
intervalStarts[i] = (int)interval.getStart();
intervalEnds[i] = (int)interval.getStop();
intervalContigIndices[i] = interval.getContigIndex();
intervalStarts[i] = interval.getStart();
intervalEnds[i] = interval.getStop();
i++;
}
}
@ -961,11 +964,10 @@ public class SAMDataSource {
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
if(!keepOnlyUnmappedReads) {
// Mapped read filter; check against GenomeLoc-derived bounds.
if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] ||
(candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) {
// This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
if(readEndsOnOrAfterStartingBound(candidateRead)) {
// This read ends after the current interval begins.
// Promising, but this read must be checked against the ending bound.
if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) {
if(readStartsOnOrBeforeEndingBound(candidateRead)) {
// Yes, this read is within both bounds. This must be our next read.
nextRead = candidateRead;
break;
@ -993,6 +995,37 @@ public class SAMDataSource {
candidateRead = iterator.next();
}
}
/**
* Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
* end will be distorted, so rely only on the alignment start.
* @param read The read to position-check.
* @return True if the read starts after the current bounds. False otherwise.
*/
private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
return
// Read ends on a later contig, or...
read.getReferenceIndex() > intervalContigIndices[currentBound] ||
// Read ends of this contig...
(read.getReferenceIndex() == intervalContigIndices[currentBound] &&
// either after this location, or...
(read.getAlignmentEnd() >= intervalStarts[currentBound] ||
// read is unmapped but positioned and alignment start is on or after this start point.
(read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
}
/**
* Check whether the read lies before the end of the current bound.
* @param read The read to position-check.
* @return True if the read starts after the current bounds. False otherwise.
*/
private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
return
// Read starts on a prior contig, or...
read.getReferenceIndex() < intervalContigIndices[currentBound] ||
// Read starts on this contig and the alignment start is registered before this end point.
(read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
}
}
/**

View File

@ -41,7 +41,7 @@ import java.io.File;
* Loads reference data from fasta file
* Looks for fai and dict files, and tries to create them if they don't exist
*/
public class ReferenceDataSource implements ReferenceDataSourceProgressListener {
public class ReferenceDataSource {
private IndexedFastaSequenceFile index;
/** our log, which we want to capture anything from this class */
@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
// get exclusive lock
if (!indexLock.exclusiveLock())
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this);
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
}
@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
public IndexedFastaSequenceFile getReference() {
return this.index;
}
/**
* Notify user of progress in creating fai file
* @param percent Percent of fasta file read as a percent
*/
public void percentProgress(int percent) {
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
}
}

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
}
public Class getType() {
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
}
public Class getRecordType() {
return builder.createCodec(getType(),getName()).getFeatureType();
return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
}
public File getFile() {

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
/**
* [Short one sentence description of this walker]
*
* <p>
* [Functionality of this walker]
* </p>
*
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
*
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GATKDocsExample extends RodWalker<Integer, Integer> {
/**
* Put detailed documentation about the argument here. No need to duplicate the summary information
* in doc annotation field, as that will be added before this text in the documentation page.
*
* Notes:
* <ul>
* <li>This field can contain HTML as a normal javadoc</li>
* <li>Don't include information about the default value, as gatkdocs adds this automatically</li>
* <li>Try your best to describe in detail the behavior of the argument, as ultimately confusing
* docs here will just result in user posts on the forum</li>
* </ul>
*/
@Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false)
private boolean myWalkerArgument = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) { }
}

View File

@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
traversalEngine.startTimers();
ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker);

View File

@ -44,15 +44,16 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param shardStrategy A strategy for sharding the data.
*/
public Object execute(Walker walker, ShardStrategy shardStrategy) {
traversalEngine.startTimers();
walker.initialize();
Accumulator accumulator = Accumulator.create(engine,walker);
boolean done = walker.isDone();
int counter = 0;
for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) {
if ( shard == null ) // we ran out of shards that aren't owned
for (Shard shard : shardStrategy ) {
if ( done || shard == null ) // we ran out of shards that aren't owned
break;
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result);
dataProvider.close();
if ( walker.isDone() ) break;
}
windowMaker.close();
}
@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler {
accumulator.accumulate(dataProvider,result);
dataProvider.close();
}
done = walker.isDone();
}
Object result = accumulator.finishTraversal();

View File

@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.*;
import javax.management.JMException;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.lang.management.ManagementFactory;
import java.util.Collection;
@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
private final MBeanServer mBeanServer;
private final ObjectName mBeanName;
protected GenomeLocProcessingTracker processingTracker;
/**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the
* selected walker.
@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* @return The best-fit microscheduler.
*/
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
if (engine.getArguments().processingTrackerFile != null) {
if ( walker instanceof ReadWalker )
throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers"));
}
if (walker instanceof TreeReducible && nThreadsToUse > 1) {
if(walker.isReduceByInterval())
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
catch (JMException ex) {
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
}
//
// create the processing tracker
//
if ( engine.getArguments().processingTrackerFile != null ) {
logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably.");
if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) {
engine.getArguments().processingTrackerFile.delete();
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
}
PrintStream statusStream = null;
if ( engine.getArguments().processingTrackerStatusFile != null ) {
try {
statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
} catch ( FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
}
}
ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID);
processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ;
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID);
} else {
// create a NoOp version that doesn't do anything but say "yes"
processingTracker = new NoOpGenomeLocProcessingTracker();
}
}
/**

View File

@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
public Object call() {
try {
traversalEngine.startTimersIfNecessary();
long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit();

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out FailsVendorQualityCheck reads.
*/
public class FailsVendorQualityCheckReadFilter extends ReadFilter {
public class FailsVendorQualityCheckFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag();
}

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
* @version 0.1
*/
public class MappingQualityReadFilter extends ReadFilter {
public class MappingQualityFilter extends ReadFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 10;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
* @version 0.1
*/
public class MappingQualityUnavailableReadFilter extends ReadFilter {
public class MappingQualityUnavailableFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
}

View File

@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
* @version 0.1
*/
public class MappingQualityZeroReadFilter extends ReadFilter {
public class MappingQualityZeroFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0);
}

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out duplicate reads.
*/
public class NotPrimaryAlignmentReadFilter extends ReadFilter {
public class NotPrimaryAlignmentFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) {
return read.getNotPrimaryAlignmentFlag();
}

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1
*/
public class PlatformFilter extends ReadFilter {
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) {

View File

@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
/**
* A SamRecordFilter that also depends on the header.
*/
@DocumentedGATKFeature(
groupName = "Read filters",
summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" )
public abstract class ReadFilter implements SamRecordFilter {
/**
* Sets the header for use by this filter.

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
/**
* A read filter (transformer) that sets all reads mapping quality to a given value.
*
* <p>
* If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
* all your mapping qualities to a given value. Default being 60.
* </p>
*
*
* <h2>Input</h2>
* <p>
* BAM file(s)
* </p>
*
*
* <h2>Output</h2>
* <p>
* BAM file(s) with all reads mapping qualities reassigned
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -rf ReassignMappingQuality
* -DMQ 35
* </pre>
*
* @author carneiro
* @since 8/8/11
*/
public class ReassignMappingQualityFilter extends ReadFilter {
@Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
public int defaultMappingQuality = 60;
public boolean filterOut(SAMRecord rec) {
rec.setMappingQuality(defaultMappingQuality);
return false;
}
}

View File

@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
writer.writeHeader(stub.getVCFHeader());
}
public void add(VariantContext vc, byte ref) {
writer.add(vc, ref);
public void add(VariantContext vc) {
writer.add(vc);
}
/**
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
for ( VariantContext vc : source.iterator() ) {
target.writer.add(vc, vc.getReferenceBaseForIndel());
target.writer.add(vc);
}
source.close();

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.reflect.Constructor;
import java.lang.reflect.Type;
/**
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
@ -69,16 +70,21 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) {
public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
engine.addOutput(stub);
return createInstanceOfClass(type,stub);
return createInstanceOfClass((Class)type,stub);
}
@Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition definition = createDefaultArgumentDefinition(source);
String fileName = getArgumentValue( definition, matches );
@ -91,7 +97,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
engine.addOutput(stub);
Object result = createInstanceOfClass(type,stub);
Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
// WARNING: Side effects required by engine!
parsingEngine.addTags(result,getArgumentTags(matches));

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import java.io.File;
import java.lang.reflect.Type;
/**
* Describe how to parse SAMFileReaders.
@ -52,14 +53,13 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
this.engine = engine;
}
@Override
public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type);
}
@Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.List;
@ -93,7 +94,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
@ -102,7 +108,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
}
@Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
// Extract all possible parameters that could be passed to a BAM file writer?
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
String writerFileName = getArgumentValue( bamArgumentDefinition, matches );

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.OutputStream;
import java.lang.reflect.Type;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashSet;
@ -108,7 +109,12 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) {
public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
@ -124,7 +130,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @return Transform from the matches into the associated argument.
*/
@Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) {
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
vcfHeader = header;
// Check for the command-line argument header line. If not present, add it in.
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for(VCFHeaderLine line: vcfHeader.getMetaData()) {
if(line.getKey().equals(commandLineArgHeaderLine.getKey()))
foundCommandLineHeaderLine = true;
if ( !skipWritingHeader ) {
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
boolean foundCommandLineHeaderLine = false;
for (VCFHeaderLine line: vcfHeader.getMetaData()) {
if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
foundCommandLineHeaderLine = true;
}
if ( !foundCommandLineHeaderLine )
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
// also put in the reference contig header lines
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
}
if(!foundCommandLineHeaderLine && !skipWritingHeader)
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
outputTracker.getStorage(this).writeHeader(vcfHeader);
}
@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/**
* @{inheritDoc}
*/
public void add(VariantContext vc, byte ref) {
outputTracker.getStorage(this).add(vc,ref);
public void add(VariantContext vc) {
outputTracker.getStorage(this).add(vc);
}
/**
@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
}
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
String val;
if ( assembly != null )
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
else
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
return new VCFHeaderLine("contig", val);
}
private String getReferenceAssembly(String refPath) {
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
String assembly = null;
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
assembly = "b37";
else if ( refPath.indexOf("b36") != -1 )
assembly = "b36";
else if ( refPath.indexOf("hg18") != -1 )
assembly = "hg18";
else if ( refPath.indexOf("hg19") != -1 )
assembly = "hg19";
return assembly;
}
}

View File

@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format;
import org.simpleframework.xml.stream.HyphenStyle;
import java.io.*;
import java.net.InetAddress;
import java.security.NoSuchAlgorithmException;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
@ -154,9 +153,13 @@ public class GATKRunReport {
private long nReads;
public enum PhoneHomeOption {
/** Disable phone home */
NO_ET,
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
STANDARD,
/** Force output to STDOUT. For debugging only */
STDOUT,
/** Force output to S3. For debugging only */
AWS_S3 // todo -- remove me -- really just for testing purposes
}
@ -226,22 +229,6 @@ public class GATKRunReport {
}
/**
* Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
* unresolvable gets returned instead.
*
* @return
*/
private String resolveHostname() {
try {
return InetAddress.getLocalHost().getCanonicalHostName();
}
catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
return "unresolvable";
// handle exception
}
}
public void postReport(PhoneHomeOption type) {
logger.debug("Posting report of type " + type);
switch (type) {
@ -321,7 +308,7 @@ public class GATKRunReport {
private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
this.hostName = resolveHostname(); // we want to fill in the host name
this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./"));
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) { // we succeeded in creating the local file

View File

@ -1,238 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.lang.reflect.Constructor;
import java.util.Iterator;
import java.util.regex.Pattern;
/**
* This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
* files. The only assumption made is that every line in the file provides a complete and separate data record. The records
* can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
* position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
* otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
* provided that is capable of parsing itself from a single line of data. This implementation will be used,
* through reflection mechanism, as a callback to do all the work.
*
* The model is, hence, as follows:
*
* String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
* an instance of RodImplementation with data values from dataRecord line).
*
*
* instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
* that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
* representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
* track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
* multiple streams of (possibly heterogeneous) annotation data bound to an application.
*
* This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
* to the ROD implementation class for processing.
*
* This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
* and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
* iterators/wrappers will perform all the necessary checks.
*
* Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
* mechanism is as follows:
* 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
* and return the header object (to be kept by the iterator)
* 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
* and
* 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
* method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
* other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
*
* Created by IntelliJ IDEA.
* User: asivache
* Date: Sep 10, 2009
* Time: 1:22:23 PM
* To change this template use File | Settings | File Templates.
*/
public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
private PushbackIterator<String> reader;
// stores name of the track this iterator reads (will be also returned by getName() of ROD objects
// generated by this iterator)
private String name;
// we keep the file object, only to use file name in error reports
private File file;
// rod type; this is what we will instantiate for RODs at runtime
private Class<ROD> type;
private Object header = null; // Some RODs may use header
// field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
private String fieldDelimiter;
// constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
private Constructor<ROD> named_constructor;
// keep track of the lines we are reading. used for error messages only.
private long linenum = 0;
private boolean allow_empty = true;
private boolean allow_comments = true;
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
public RODRecordIterator(File file, String name, Class<ROD> type) {
try {
reader = new PushbackIterator<String>(new XReadLines(file));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(file, e);
}
this.file = file;
this.name = name;
this.type = type;
try {
named_constructor = type.getConstructor(String.class);
}
catch (java.lang.NoSuchMethodException e) {
throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
}
ROD rod = instantiateROD(name);
fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
try {
header = rod.initialize(file);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
}
}
/**
* Returns <tt>true</tt> if the iteration has more elements. (In other
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
* rather than throwing an exception.)
*
* @return <tt>true</tt> if the iterator has more elements.
*/
public boolean hasNext() {
if ( allow_empty || allow_comments ) {
while ( reader.hasNext() ) {
String line = reader.next();
if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
// the line is not empty and not a comment line, so we have next after all
reader.pushback(line);
return true;
}
return false; // oops, we end up here if there's nothing left
} else {
return reader.hasNext();
}
}
/**
* Returns the next valid ROD record in the file, skipping empty and comment lines.
*
* @return the next element in the iteration.
* @throws java.util.NoSuchElementException
* iteration has no more elements.
*/
public ROD next() {
ROD n = null;
boolean parsed_ok = false;
String line ;
while ( ! parsed_ok && reader.hasNext() ) {
line = reader.next();
linenum++;
while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
allow_comments && line.charAt(0) == '#' ) {
if ( reader.hasNext() ) {
line = reader.next();
linenum++;
} else {
line = null;
break;
}
}
if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
String parts[] = line.split(fieldDelimiter);
try {
n = instantiateROD(name);
parsed_ok = n.parseLine(header,parts) ;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
"\nOffending line: "+line+
"\nReason ("+e.getClass().getName()+")", e);
}
}
return n;
}
/**
* Removes from the underlying collection the last element returned by the
* iterator (optional operation). This method can be called only once per
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
* the underlying collection is modified while the iteration is in
* progress in any way other than by calling this method.
*
* @throws UnsupportedOperationException if the <tt>remove</tt>
* operation is not supported by this Iterator.
* @throws IllegalStateException if the <tt>next</tt> method has not
* yet been called, or the <tt>remove</tt> method has already
* been called after the last call to the <tt>next</tt>
* method.
*/
public void remove() {
throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
}
/** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
* of the ROD track.
* @param name
* @return
*/
private ROD instantiateROD(final String name) {
try {
return (ROD) named_constructor.newInstance(name);
} catch (Exception e) {
throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
}
}
}

View File

@ -1,13 +1,15 @@
package org.broadinstitute.sting.gatk.refdata;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
@ -18,348 +20,402 @@ import java.util.*;
* The standard interaction model is:
*
* Traversal system arrives at a site, which has a bunch of RMDs covering it
Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs
* Traversal passes tracker to the walker
* walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was
* bound at this site.
* Traversal passes creates a tracker and passes it to the walker
* walker calls get(rodBinding) to obtain the RMDs values at this site for the track
* associated with rodBinding.
*
* Note that this is an immutable class. Once created the underlying data structures
* cannot be modified
*
* User: mdepristo
* Date: Apr 3, 2009
* Time: 3:05:23 PM
*/
public class RefMetaDataTracker {
// TODO: this should be a list, not a map, actually
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
final Map<String, RODRecordList> map;
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
final ReferenceContext ref;
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
public RefMetaDataTracker(int nBindings) {
if ( nBindings == 0 )
// ------------------------------------------------------------------------------------------
//
//
// Special ENGINE interaction functions
//
//
// ------------------------------------------------------------------------------------------
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
this.ref = ref;
// set up the map
if ( allBindings.isEmpty() )
map = Collections.emptyMap();
else
map = new HashMap<String, RODRecordList>(nBindings);
else {
Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
for ( RODRecordList rod : allBindings ) {
if ( rod != null && ! rod.isEmpty() )
tmap.put(canonicalName(rod.getName()), rod);
}
// ensure that no one modifies the map itself
map = Collections.unmodifiableMap(tmap);
}
}
// ------------------------------------------------------------------------------------------
//
//
// Generic accessors
//
//
// ------------------------------------------------------------------------------------------
/**
* Gets all of the Tribble features spanning this locus, returning them as a list of specific
* type T extending Feature. This function looks across all tracks to find the Features, so
* if you have two tracks A and B each containing 1 Feature, then getValues will return
* a list containing both features.
*
* Note that this function assumes that all of the bound features are instances of or
* subclasses of T. A ClassCastException will occur if this isn't the case. If you want
* to get all Features without any danger of such an exception use the root Tribble
* interface Feature.
*
* @param type The type of the underlying objects bound here
* @param <T> as above
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"type != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Class<T> type) {
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
}
/**
* get all the reference meta data associated with a track name.
* @param name the name of the track we're looking for
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
* dbSNP RMD this will be a RodDbSNP, etc.
* Provides the same functionality as @link #getValues(Class<T>) but will only include
* Features that start as the GenomeLoc provide onlyAtThisLoc.
*
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
* @param type The type of the underlying objects bound here
* @param onlyAtThisLoc
* @param <T> as above
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
public List<Object> getReferenceMetaData(final String name) {
RODRecordList list = getTrackDataByName(name, true);
List<Object> objects = new ArrayList<Object>();
if (list == null) return objects;
for (GATKFeature feature : list)
objects.add(feature.getUnderlyingObject());
return objects;
@Requires({"type != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
}
/**
* get all the reference meta data associated with a track name.
* @param name the name of the track we're looking for
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
* the passed in parameter (false).
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
* dbSNP rod this will be a RodDbSNP, etc.
* Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
* elements of the list to return. That is, if there would be two elements in the result of
* @link #getValues(Class), one of these two is selected, and which one it will be isn't
* specified. Consequently, this method is only really safe if (1) you absolutely know
* that only one binding will meet the constraints of @link #getValues(Class) or (2)
* you truly don't care which of the multiple bindings available you are going to examine.
*
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
* If there are no bindings here, getFirstValue() return null
*
* @param type The type of the underlying objects bound here
* @param <T> as above
* @return A random single element the RODs bound here, or null if none are bound.
*/
public List<Object> getReferenceMetaData(final String name, boolean requireExactMatch) {
RODRecordList list = getTrackDataByName(name, requireExactMatch);
List<Object> objects = new ArrayList<Object>();
if (list == null) return objects;
for (GATKFeature feature : list)
objects.add(feature.getUnderlyingObject());
return objects;
@Requires({"type != null"})
public <T extends Feature> T getFirstValue(final Class<T> type) {
return safeGetFirst(getValues(type));
}
/**
* get all the GATK features associated with a specific track name
* @param name the name of the track we're looking for
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with
* the passed in parameter (false).
* @return a list of GATKFeatures for the target rmd
* Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
* of eligible Features and @link #getFirstValue(Class) to select a single
* element from the interval list.
*
* Important: The list returned by this function is guaranteed not to be null, but may be empty!
* @param type The type of the underlying objects bound here
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
*/
public List<GATKFeature> getGATKFeatureMetaData(final String name, boolean requireExactMatch) {
List<GATKFeature> feat = getTrackDataByName(name,requireExactMatch);
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null
@Requires({"type != null", "onlyAtThisLoc != null"})
public <T extends Feature> T getFirstValue(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(getValues(type, onlyAtThisLoc));
}
/**
* get a singleton record, given the name and a type. This function will return the first record at the current position seen,
* and emit a logger warning if there were more than one option.
* Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
* a list of specific type T extending Feature.
*
* WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets
* picked may change from time to time! BE WARNED!
*
* @param name the name of the track
* @param clazz the underlying type to return
* @param <T> the type to parameterize on, matching the clazz argument
* @return a record of type T, or null if no record is present.
* Note that this function assumes that all of the bound features are instances of or
* subclasses of T. A ClassCastException will occur if this isn't the case.
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Deprecated
public <T> T lookup(final String name, Class<T> clazz) {
RODRecordList objects = getTrackDataByName(name, true);
@Requires({"rodBinding != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding) {
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), null, false, false);
}
// if emtpy or null return null;
if (objects == null || objects.size() < 1) return null;
/**
* Gets all of the Tribble features bound to any RodBinding in rodBindings,
* spanning this locus, returning them as a list of specific type T extending Feature.
*
* Note that this function assumes that all of the bound features are instances of or
* subclasses of T. A ClassCastException will occur if this isn't the case.
*
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBindings != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings) {
List<T> results = new ArrayList<T>(1);
for ( RodBinding<T> rodBinding : rodBindings )
results.addAll(getValues(rodBinding));
return results;
}
if (objects.size() > 1)
logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options");
/**
* The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
}
Object obj = objects.get(0).getUnderlyingObject();
if (!(clazz.isAssignableFrom(obj.getClass())))
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString()
+ " it's of type " + obj.getClass());
/**
* The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
*
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
List<T> results = new ArrayList<T>(1);
for ( RodBinding<T> rodBinding : rodBindings )
results.addAll(getValues(rodBinding, onlyAtThisLoc));
return results;
}
return (T)obj;
/**
* Uses the same logic as @getValues(RodBinding) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> as above
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBinding != null"})
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding) {
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
}
/**
* Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
}
/**
* Uses the same logic as @getValues(List) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
* @param <T> as above
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBindings != null"})
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings) {
for ( RodBinding<T> rodBinding : rodBindings ) {
T val = getFirstValue(rodBinding);
if ( val != null )
return val;
}
return null;
}
/**
* Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
for ( RodBinding<T> rodBinding : rodBindings ) {
T val = getFirstValue(rodBinding, onlyAtThisLoc);
if ( val != null )
return val;
}
return null;
}
/**
* Is there a binding at this site to a ROD/track with the specified name?
*
* @param name the name of the rod
* @return true if it has the rod
* @param rodBinding the rod binding we want to know about
* @return true if any Features are bound in this tracker to rodBinding
*/
public boolean hasROD(final String name) {
return map.containsKey(canonicalName(name));
}
/**
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
* at the current site, they all will be added to the list as separate elements.
*
* @return collection of all rods
*/
public Collection<GATKFeature> getAllRods() {
List<GATKFeature> l = new ArrayList<GATKFeature>();
for ( RODRecordList rl : map.values() ) {
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
l.addAll(rl);
}
return l;
@Requires({"rodBinding != null"})
public boolean hasValues(final RodBinding rodBinding) {
return map.containsKey(canonicalName(rodBinding.getName()));
}
/**
* Get all of the RMD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple RMD records associated with the current site.
*
* @return collection of all tracks
* @return List of all tracks
*/
public Collection<RODRecordList> getBoundRodTracks() {
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>();
for ( RODRecordList value : map.values() ) {
if ( value != null && value.size() != 0 ) bound.add(value);
}
return bound;
public List<RODRecordList> getBoundRodTracks() {
return new ArrayList<RODRecordList>(map.values());
}
/**
* @return the number of ROD bindings (name -> value) where value is not empty in this tracker
* The number of tracks with at least one value bound here
* @return the number of tracks with at least one bound Feature
*/
public int getNBoundRodTracks() {
return getNBoundRodTracks(null);
public int getNTracksWithBoundFeatures() {
return map.size();
}
public int getNBoundRodTracks(final String excludeIn ) {
final String exclude = excludeIn == null ? null : canonicalName(excludeIn);
// ------------------------------------------------------------------------------------------
//
//
// old style accessors
//
// TODO -- DELETE ME
//
//
// ------------------------------------------------------------------------------------------
int n = 0;
for ( RODRecordList value : map.values() ) {
if ( value != null && ! value.isEmpty() ) {
if ( exclude == null || ! value.getName().equals(exclude) )
n++;
}
}
return n;
@Deprecated
public boolean hasValues(final String name) {
return map.containsKey(canonicalName(name));
}
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
}
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
return safeGetFirst(getValues(type, name));
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
}
// ------------------------------------------------------------------------------------------
//
//
// Private utility functions
//
//
// ------------------------------------------------------------------------------------------
/**
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal
* system to provide access to RMDs in a structured way to the walkers.
* Helper function for getFirst() operations that takes a list of <T> and
* returns the first element, or null if no such element exists.
*
* @param name the name of the track
* @param rod the collection of RMD data
*/
public void bind(final String name, RODRecordList rod) {
//logger.debug(String.format("Binding %s to %s", name, rod));
map.put(canonicalName(name), rod);
}
/**
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
* of entries per ROD.
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
return getAllVariantContexts(ref, null, null, false, false);
}
/**
* Returns all of the variant contexts that start at the current location
* @param ref
* @param curLocation
* @param l
* @param <T>
* @return
*/
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) {
return getAllVariantContexts(ref, null, curLocation, true, false);
@Requires({"l != null"})
final private <T extends Feature> T safeGetFirst(final List<T> l) {
return l.isEmpty() ? null : l.get(0);
}
/**
* Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only
* VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation
* must not be null, and only records whose start position is == to curLocation.getStart() will be returned.
* If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course,
* this single object must pass the allowed types and start here options if provided. Note that the result
* may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
* the current location.
*
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @param allowedTypes allowed types
* @param curLocation location
* @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only?
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
List<VariantContext> contexts = new ArrayList<VariantContext>();
for ( RODRecordList rodList : getBoundRodTracks() ) {
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
return contexts;
}
/**
* Gets the variant contexts associated with track name name
*
* see getVariantContexts for more information.
*
* @param ref ReferenceContext to enable conversion to variant context
* @param name name
* @param curLocation location
* @param allowedTypes allowed types
* @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only?
* @return variant context
*/
// public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
// }
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
// public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
// }
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
private <T extends Feature> List<T> addValues(final Collection<String> names,
final Class<T> type,
List<T> values,
final GenomeLoc curLocation,
final boolean requireStartHere,
final boolean takeFirstOnly ) {
for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match
if ( rodList != null )
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
if ( takeFirstOnly && ! values.isEmpty() )
break;
}
return contexts;
}
public Collection<VariantContext> getVariantContextsByPrefix(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
if ( rodList != null )
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
}
return contexts;
}
/**
* Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
* see getVariantContexts for more information.
*
* @param name name
* @param curLocation location
* @param allowedTypes allowed types
* @param requireStartHere do we require the rod to start at this location?
* @return variant context
*/
public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
Collection<VariantContext> contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
if ( contexts.size() > 1 )
throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
else if ( contexts.size() == 0 )
return null;
else
return contexts.iterator().next();
}
/**
* Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
* null if there's no binding here.
*
* @param ref
* @param name
* @param curLocation
* @return
*/
public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
return getVariantContext(ref, name, null, curLocation, true);
return values;
}
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
private <T extends Feature> List<T> addValues(final String name,
final Class<T> type,
List<T> values,
final RODRecordList rodList,
final GenomeLoc curLocation,
final boolean requireStartHere,
final boolean takeFirstOnly ) {
for ( GATKFeature rec : rodList ) {
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) {
// ok, we might actually be able to turn this record in a variant context
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref);
if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
Object obj = rec.getUnderlyingObject();
if (!(type.isAssignableFrom(obj.getClass())))
throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
+ " it's of type " + obj.getClass());
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted
continue;
T objT = (T)obj;
if ( takeFirstOnly ) {
if ( values == null )
values = Arrays.asList(objT);
else
values.add(objT);
// now, let's decide if we want to keep it
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType());
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart();
if ( goodType && goodPos ) { // ok, we are going to keep this thing
contexts.add(vc);
if ( takeFirstOnly )
// we only want the first passing instance, so break the loop over records in rodList
break;
break;
} else {
if ( values == null )
values = new ArrayList<T>();
values.add(objT);
}
}
}
return values == null ? Collections.<T>emptyList() : values;
}
/**
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated
* with the current site as a RODRecordList collection object. If no data track with specified name is available,
* with the current site as a RODRecordList List object. If no data track with specified name is available,
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
@ -367,29 +423,16 @@ public class RefMetaDataTracker {
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location).
* @param name track name
* @param requireExactMatch do we require an exact match of the rod name?
* @return track data for the given rod
*/
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) {
//logger.debug(String.format("Lookup %s%n", name));
private RODRecordList getTrackDataByName(final String name) {
final String luName = canonicalName(name);
RODRecordList trackData = null;
RODRecordList l = map.get(luName);
return l == null ? EMPTY_ROD_RECORD_LIST : l;
}
if ( requireExactMatch ) {
if ( map.containsKey(luName) )
trackData = map.get(luName);
} else {
for ( Map.Entry<String, RODRecordList> datum : map.entrySet() ) {
final String rodName = datum.getKey();
if ( datum.getValue() != null && rodName.startsWith(luName) ) {
if ( trackData == null ) trackData = new RODRecordListImpl(name);
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
((RODRecordListImpl)trackData).add(datum.getValue(), true);
}
}
}
return trackData;
private RODRecordList getTrackDataByName(final RodBinding binding) {
return getTrackDataByName(binding.getName());
}
/**
@ -398,6 +441,7 @@ public class RefMetaDataTracker {
* @return canonical name of the rod
*/
private final String canonicalName(final String name) {
// todo -- remove me after switch to RodBinding syntax
return name.toLowerCase();
}
}

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* Class for representing arbitrary reference ordered data sets
* <p/>
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ReferenceOrderedDatum> {
private String name;
private File file = null;
// private String fieldDelimiter;
/** Header object returned from the datum */
// private Object header = null;
private Class<ROD> type = null; // runtime type information for object construction
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
/**
* given an existing file, open it and append all the valid triplet lines to an existing list
*
* @param rodTripletList the list of existing triplets
* @param filename the file to attempt to extract ROD triplets from
*/
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
BufferedReader str;
try {
str = new BufferedReader(new FileReader(new File(filename)));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e);
}
String line = "NO LINES READ IN";
try {
while ((line = str.readLine()) != null) {
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
}
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e);
}
}
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
this.name = name;
this.file = file;
this.type = type;
// this.header = initializeROD(name, file, type);
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
}
public String getName() { return name; }
public File getFile() { return file; }
public Class<ROD> getType() { return type; }
/**
* Special equals override to see if this ROD is compatible with the given
* name and type. 'Compatible' means that this ROD has the name that's passed
* in and its data can fit into the container specified by type.
*
* @param name Name to check.
* @param type Type to check.
*
* @return True if these parameters imply this rod. False otherwise.
*/
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
return this.name.equals(name) && type.isAssignableFrom(this.type);
}
public Iterator<ReferenceOrderedDatum> iterator() {
Iterator<ReferenceOrderedDatum> it;
try {
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
it = (Iterator<ReferenceOrderedDatum>) m.invoke(null, name, file);
} catch (java.lang.NoSuchMethodException e) {
it = new RODRecordIterator(file,name,type);
} catch (java.lang.NullPointerException e) {
throw new RuntimeException(e);
} catch (java.lang.SecurityException e) {
throw new RuntimeException(e);
} catch (java.lang.IllegalAccessException e) {
throw new RuntimeException(e);
} catch (java.lang.IllegalArgumentException e) {
throw new RuntimeException(e);
} catch (java.lang.reflect.InvocationTargetException e) {
throw new RuntimeException(e);
}
// return new RODIterator<ROD>(it);
return it;
}
// ----------------------------------------------------------------------
//
// Manipulations of all of the data
//
// ----------------------------------------------------------------------
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
final FileWriter out = new FileWriter(output);
for (ReferenceOrderedDatum rec : data) {
out.write(rec.repl() + "\n");
}
out.close();
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import java.io.File;
/**
* An interface marking that a given Tribble codec can look at the file and determine whether the
* codec specifically parsing the contents of the file.
*/
public interface SelfScopingFeatureCodec {
/**
* This function returns true iff the File potentialInput can be parsed by this
* codec.
*
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
* return true for the same file. If this occurs the GATK splits out an error.
*
* Note this function must never throw an error. All errors should be trapped
* and false returned.
*
* @param potentialInput the file to test for parsiability with this codec
* @return true if potentialInput can be parsed, false otherwise
*/
public boolean canDecode(final File potentialInput);
}

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil;
import org.broad.tribble.Feature;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.dbsnp.OldDbSNPFeature;
import org.broad.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature;
import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -92,28 +93,89 @@ public class VariantContextAdaptors {
// --------------------------------------------------------------------------------------------------------------
private static class DBSnpAdaptor implements VCAdaptor {
private static boolean isSNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
}
private static boolean isMNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
}
private static boolean isInsertion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("insertion");
}
private static boolean isDeletion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("deletion");
}
private static boolean isIndel(OldDbSNPFeature feature) {
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
}
public static boolean isComplexIndel(OldDbSNPFeature feature) {
return feature.getVariantType().contains("in-del");
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public static List<String> getAlleleList(OldDbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>();
// add ref first
if ( feature.getStrand() == Strand.POSITIVE )
alleleList = Arrays.asList(feature.getObserved());
else
for (String str : feature.getObserved())
alleleList.add(SequenceUtil.reverseComplement(str));
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
return alleleList;
}
/**
* Converts non-VCF formatted dbSNP records to VariantContext.
* @return DbSNPFeature.
* @return OldDbSNPFeature.
*/
@Override
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; }
public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
@Override
public VariantContext convert(String name, Object input, ReferenceContext ref) {
DbSNPFeature dbsnp = (DbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) )
OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null;
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true);
Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || DbSNPHelper.isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
// add the reference allele
List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele);
// add all of the alt alleles
boolean sawNullAllele = false;
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) {
boolean sawNullAllele = refAllele.isNull();
for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null;
@ -127,14 +189,13 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( sawNullAllele ) {
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
}
Collection<Genotype> genotypes = null;
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
Map<String, Genotype> genotypes = null;
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
return vc;
} else
return null; // can't handle anything else
@ -164,16 +225,6 @@ public class VariantContextAdaptors {
@Override
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
@ -237,17 +288,7 @@ public class VariantContextAdaptors {
* @return HapMapFeature.
*/
@Override
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
public Class<? extends Feature> getAdaptableFeatureType() { return RawHapMapFeature.class; }
/**
* convert to a Variant Context, given:
@ -261,7 +302,12 @@ public class VariantContextAdaptors {
if ( ref == null )
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
HapMapFeature hapmap = (HapMapFeature)input;
RawHapMapFeature hapmap = (RawHapMapFeature)input;
int index = hapmap.getStart() - ref.getWindow().getStart();
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
HashSet<Allele> alleles = new HashSet<Allele>();
Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -271,7 +317,7 @@ public class VariantContextAdaptors {
// use the actual alleles, if available
if ( alleleMap != null ) {
alleles.addAll(alleleMap.values());
Allele deletionAllele = alleleMap.get(HapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
Allele deletionAllele = alleleMap.get(RawHapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
if ( deletionAllele != null && deletionAllele.isReference() )
deletionLength = deletionAllele.length();
} else {
@ -321,7 +367,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd();
if ( deletionLength > 0 )
end += deletionLength;
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs);
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
return vc;
}
}

View File

@ -1,193 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
public static final String DELIMITER = "\t";
private ArrayList<String> header;
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
/**
* Parses the header.
*
* @param reader
*
* @return The # of header lines for this file.
*/
public Object readHeader(LineReader reader)
{
int[] lineCounter = new int[1];
try {
header = readHeader(reader, lineCounter);
} catch(IOException e) {
throw new IllegalArgumentException("Unable to read from file.", e);
}
return header;
}
public Class<AnnotatorInputTableFeature> getFeatureType() {
return AnnotatorInputTableFeature.class;
}
@Override
public Feature decodeLoc(String line) {
StringTokenizer st = new StringTokenizer(line, DELIMITER);
if ( st.countTokens() < 1 )
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
GenomeLoc loc;
String chr = st.nextToken();
if ( chr.indexOf(":") != -1 ) {
loc = genomeLocParser.parseGenomeLoc(chr);
} else {
if ( st.countTokens() < 3 )
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
}
return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop());
}
/**
* Parses the line into an AnnotatorInputTableFeature object.
*
* @param line
*/
public AnnotatorInputTableFeature decode(String line) {
final ArrayList<String> header = this.header; //optimization
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
if ( values.size() != header.size()) {
throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
}
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
for ( int i = 0; i < header.size(); i++ ) {
feature.putColumnValue(header.get(i), values.get(i));
}
GenomeLoc loc;
if ( values.get(0).indexOf(":") != -1 )
loc = genomeLocParser.parseGenomeLoc(values.get(0));
else
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
//parse the location
feature.setChr(loc.getContig());
feature.setStart((int)loc.getStart());
feature.setEnd((int)loc.getStop());
return feature;
}
/**
* Returns the header.
* @param source
* @return
* @throws IOException
*/
public static ArrayList<String> readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
}
/**
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
* @param source
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
* @return The header fields.
* @throws IOException
*/
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
ArrayList<String> header = null;
int numLines = 0;
//find the 1st line that's non-empty and not a comment
String line = null;
while( (line = source.readLine()) != null ) {
numLines++;
if ( line.trim().isEmpty() || line.startsWith("#") ) {
continue;
}
//parse the header
header = Utils.split(line, DELIMITER);
break;
}
// check that we found the header
if ( header == null ) {
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
}
if(lineCounter != null) {
lineCounter[0] = numLines;
}
logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
return header;
}
}

View File

@ -1,158 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import org.broad.tribble.Feature;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* This class represents a single record in an AnnotatorInputTable.
*/
public class AnnotatorInputTableFeature implements Feature {
private ArrayList<String> columnNames;
private HashMap<String, String> columnValues; //maps colum names to column values
private String chr;
private int start;
private int end;
private String strRep = null;
/**
* Constructor.
* @param chr The chromosome name.
* @param start The start position
* @param end The end position
*/
public AnnotatorInputTableFeature(String chr, int start, int end) {
this.chr = chr;
this.start = start;
this.end = end;
}
/**
* Constructor.
* @param columnNames The column names as parsed out of the file header.
*/
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
this.columnNames = columnNames;
this.columnValues = new HashMap<String, String>();
}
/**
* @return the list of column names from the file header.
*/
public ArrayList<String> getHeader() {
return columnNames;
}
/**
* Returns the value of the given column.
*
* @param columnName The column name as it appears in the file header.
* @return The value
*/
public String getColumnValue(final String columnName) {
return columnValues.get(columnName);
}
public boolean containsColumnName(final String columnName) {
return columnValues.containsKey(columnName);
}
/**
* Sets the value for the given column.
*
* @param columnName The column name as it appears in the file header.
* @param value The value
* @return The existing value associated with the columnName, if there is one.
*/
protected String putColumnValue(final String columnName, final String value) {
return columnValues.put(columnName, value);
}
/**
* @return all values in this line, hashed by their column names.
*/
public Map<String,String> getColumnValues() {
return Collections.unmodifiableMap(columnValues);
}
public String getChr() {
return chr;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
protected void setChr(String chr) {
this.chr = chr;
}
protected void setStart(int start) {
this.start = start;
}
protected void setEnd(int end) {
this.end = end;
}
@Override
public String toString() {
if ( strRep == null ) {
StringBuilder sb = new StringBuilder();
for(String columnName : columnNames ) {
if ( sb.length() == 0 )
sb.append("[");
else
sb.append(", ");
sb.append(columnName + "=" + columnValues.get(columnName));
}
sb.append("]");
strRep = sb.toString();
}
return strRep;
}
}

View File

@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileOutputStream;
import java.util.Map;
/**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes();
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid
if (!typeMapping.containsKey(inputFileType))
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet()));
if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType));
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec)

View File

@ -0,0 +1,255 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import javax.mail.Header;
import java.io.File;
import java.util.*;
/**
* Class for managing Tribble Feature readers available to the GATK. The features
* are dynamically determined via a PluginManager. This class provides convenient
* getter methods for obtaining FeatureDescriptor objects that collect all of the
* useful information about the Tribble Codec, Feature, and name in one place.
*
* @author depristo
*/
public class FeatureManager {
public static class FeatureDescriptor implements Comparable<FeatureDescriptor> {
final String name;
final FeatureCodec codec;
public FeatureDescriptor(final String name, final FeatureCodec codec) {
this.name = name;
this.codec = codec;
}
public String getName() {
return name;
}
public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
public FeatureCodec getCodec() {
return codec;
}
public Class getCodecClass() { return codec.getClass(); }
public Class getFeatureClass() { return codec.getFeatureType(); }
@Override
public String toString() {
return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
getName(), getCodecClass().getName(), getFeatureClass().getName());
}
@Override
public int compareTo(FeatureDescriptor o) {
return getName().compareTo(o.getName());
}
}
private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
/**
* Construct a FeatureManager
*/
public FeatureManager() {
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
FeatureCodec codec = pluginManager.createByName(rawName);
String name = rawName.toUpperCase();
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
featureDescriptors.add(featureDescriptor);
}
}
/**
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
*
* @param codecClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("codecClass != null")
public FeatureDescriptor getByCodec(Class codecClass) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodecClass().equals(codecClass) )
return descriptor;
return null;
}
/**
* Returns a collection of FeatureDescriptors that emit records of type featureClass
*
* @param featureClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("featureClass != null")
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
Set<FeatureDescriptor> consistentDescriptors = new TreeSet<FeatureDescriptor>();
if (featureClass == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for ( FeatureDescriptor descriptor : featureDescriptors ) {
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
consistentDescriptors.add(descriptor);
}
return consistentDescriptors;
}
/**
* Return the FeatureDescriptor with getName().equals(name)
*
* @param name
* @return A FeatureDescriptor or null if none is found
*/
@Requires("name != null")
public FeatureDescriptor getByName(String name) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getName().equalsIgnoreCase(name) )
return descriptor;
return null;
}
/**
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
*
* @param file
* @return A FeatureDescriptor or null if none is found
*/
@Requires({"file != null", "file.isFile()", "file.canRead()"})
public FeatureDescriptor getByFiletype(File file) {
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
canParse.add(descriptor);
}
}
if ( canParse.size() == 0 )
return null;
else if ( canParse.size() > 1 )
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
else
return canParse.get(0);
}
/**
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
* @param triplet
* @return
*/
@Requires("triplet != null")
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
return getByName(triplet.getType());
}
/**
* @return all of the FeatureDescriptors available to the GATK. Never null
*/
@Ensures("result != null")
public Collection<FeatureDescriptor> getFeatureDescriptors() {
return Collections.unmodifiableCollection(featureDescriptors);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures() {
return userFriendlyListOfAvailableFeatures(Feature.class);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* restricted to only Codecs producting Features consistent with the requiredFeatureType
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures(Class<? extends Feature> requiredFeatureType) {
final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
}
}
StringBuilder docs = new StringBuilder();
String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
docs.append(String.format(format, nameHeader, featureHeader, docHeader));
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
String oneDoc = String.format(format,
descriptor.getName(),
descriptor.getSimpleFeatureName(),
GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
docs.append(oneDoc);
}
}
return docs.toString();
}
/**
* Create a new FeatureCodec of the type described in descriptor, assigning it the
* name (if possible) and providing it the genomeLocParser (where necessary)
*
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
* @param name the name to assign this codec
* @return the feature codec itself
*/
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
@Ensures("result != null")
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
}
}

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.IOException;
/**
* @author aaron
* <p/>
* Interface QueryableTrack
* <p/>
* a decorator interface for tracks that are queryable
*/
public interface QueryableTrack {
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public void close();
}

View File

@ -25,8 +25,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -45,10 +49,10 @@ import java.io.IOException;
* the basics of what a reference metadata track must contain.
*/
public class RMDTrack {
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
// the basics of a track:
private final Class type; // our type
private final Class recordType; // the underlying records that are produced by this track
private final String name; // the name
private final File file; // the associated file we create the reader from
@ -90,7 +94,6 @@ public class RMDTrack {
*/
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
this.type = type;
this.recordType = codec.getFeatureType();
this.name = name;
this.file = file;
this.reader = reader;
@ -112,19 +115,10 @@ public class RMDTrack {
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
CloseableTribbleIterator<Feature> iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop());
if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE )
logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog());
return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName());
}
public void close() {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2010 The Broad Institute
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
@ -12,37 +12,36 @@
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks.builders;
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.*;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
@ -53,7 +52,10 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.*;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@ -67,17 +69,16 @@ import java.util.*;
* that gets iterators from the FeatureReader using Tribble.
*
*/
public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
/**
* our log, which we use to capture anything from this class
*/
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
private Map<String, Class> classes = null;
// private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null;
@ -91,6 +92,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
*/
private ValidationExclusion.TYPE validationExclusionType;
FeatureManager featureManager;
/**
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
@ -102,29 +105,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
public RMDTrackBuilder(SAMSequenceDictionary dict,
GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) {
super(FeatureCodec.class, "Codecs", "Codec");
this.dict = dict;
this.genomeLocParser = genomeLocParser;
this.validationExclusionType = validationExclusionType;
classes = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
classes.put(name.toUpperCase(), getPluginsByName().get(name));
} }
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return Collections.unmodifiableMap(classes);
this.genomeLocParser = genomeLocParser;
featureManager = new FeatureManager();
}
/** @return a list of all available track record types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() {
HashMap classToRecord = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
FeatureCodec codec = this.createByName(name);
classToRecord.put(name, codec.getFeatureType());
}
return classToRecord;
public FeatureManager getFeatureManager() {
return featureManager;
}
/**
@ -133,45 +121,38 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
* @param fileDescriptor a description of the type of track to build.
*
* @return an instance of the track
* @throws RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException {
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
String name = fileDescriptor.getName();
File inputFile = new File(fileDescriptor.getFile());
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase());
if (featureCodecClass == null)
FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
if (descriptor == null)
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
// return a feature reader track
Pair<FeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile);
pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
else
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType());
pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name));
return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
}
/**
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
* @param targetClass Type of Tribble class to build.
* @param codecClass Type of Tribble codec class to build.
* @param inputFile Input file type to use.
* @return An RMDTrack, suitable for accessing reference metadata.
*/
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) {
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics.
String typeName = null;
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
if(trackType.getValue().equals(targetClass))
typeName = trackType.getKey();
}
public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
if(typeName == null)
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName());
if (descriptor == null)
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
}
/**
@ -179,16 +160,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
* reader of the appropriate type will figure out what the right index type is, and determine if it
* exists.
*
* @param targetClass the codec class type
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the track
* @param inputFile the file to load
* @return a feature reader implementation
*/
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) {
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try {
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null);
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
} catch (TribbleException e) {
throw new UserException(e.getMessage(), e);
}
@ -196,28 +177,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
/**
* add a name to the codec, if it takes one
* @param targetClass the class to create a codec for
* @param descriptor the class to create a codec for
* @param name the name to assign this codec
* @return the feature codec itself
*/
public FeatureCodec createCodec(Class targetClass, String name) {
FeatureCodec codex = this.createByType(targetClass);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if(codex instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
return featureManager.createCodec(descriptor, name, genomeLocParser);
}
/**
* create a feature source object given:
* @param targetClass the target class
* @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the codec
* @param inputFile the tribble file to parse
* @param storageType How the RMD is streamed into the input file.
* @return the input file as a FeatureReader
*/
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) {
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
String name,
File inputFile,
RMDStorageType storageType) {
// Feature source and sequence dictionary to use as the ultimate reference
FeatureSource featureSource = null;
SAMSequenceDictionary sequenceDictionary = null;
@ -227,7 +206,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
if(canBeIndexed) {
try {
Index index = loadIndex(inputFile, createCodec(targetClass, name));
Index index = loadIndex(inputFile, createCodec(descriptor, name));
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
catch (ReviewedStingException e) { }
@ -240,7 +219,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
sequenceDictionary = getSequenceDictionaryFromProperties(index);
}
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name));
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
else
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
}
catch (TribbleException e) {
throw new UserException(e.getMessage());
@ -250,7 +232,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
}
}
else {
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false);
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
}
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
@ -385,22 +367,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
return idx;
}
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
Set<String> names = new TreeSet<String>();
if (trackRecordType == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
// ---------------------------------------------------------------------------------------------------------
// static functions to work with the sequence dictionaries of indexes

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
*
* @author aaron
*
* Class RMDTrackCreationException
*
* if we fail for some reason to make a track, throw this exception
*/
public class RMDTrackCreationException extends ReviewedStingException {
public RMDTrackCreationException(String msg) {
super(msg);
}
public RMDTrackCreationException(String message, Throwable throwable) {
super(message, throwable);
}
}

View File

@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
public abstract GenomeLoc getLocation();
// TODO: this should be a Feature
public abstract Object getUnderlyingObject();
/**
@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
return feature.getEnd();
}
// TODO: this should be a Feature, actually
public Object getUnderlyingObject() {
return feature;
}
}
/**
* wrapping a old style rod into the new GATK feature style
*/
public static class RODGATKFeature extends GATKFeature {
// our data
private ReferenceOrderedDatum datum;
public RODGATKFeature(ReferenceOrderedDatum datum) {
super(datum.getName());
this.datum = datum;
}
@Override
public GenomeLoc getLocation() {
return datum.getLocation();
}
@Override
public Object getUnderlyingObject() {
return datum;
}
@Override
public String getChr() {
return datum.getLocation().getContig();
}
@Override
public int getStart() {
return (int)datum.getLocation().getStart();
}
@Override
public int getEnd() {
return (int)datum.getLocation().getStop();
}
}
}

View File

@ -1,65 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import java.util.Iterator;
/**
*
* @author aaron
*
* Class GATKFeatureIterator
*
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
*/
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final Iterator<ReferenceOrderedDatum> iter;
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
this.iter = iter;
}
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public GATKFeature next() {
return new GATKFeature.RODGATKFeature(iter.next());
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove not supported");
}
@Override
public void close() {
// do nothing, our underlying iterator doesn't support this
}
}

View File

@ -1,190 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
import net.sf.samtools.util.SequenceUtil;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* this class contains static helper methods for DbSNP
*/
public class DbSNPHelper {
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
private DbSNPHelper() {} // don't make a DbSNPHelper
public static DbSNPFeature getFirstRealSNP(List<Object> dbsnpList) {
if (dbsnpList == null)
return null;
DbSNPFeature dbsnp = null;
for (Object d : dbsnpList) {
if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) {
dbsnp = (DbSNPFeature) d;
break;
}
}
return dbsnp;
}
public static String rsIDOfFirstRealSNP(List<Object> featureList) {
if (featureList == null)
return null;
String rsID = null;
for ( Object d : featureList ) {
if ( d instanceof DbSNPFeature ) {
if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) {
rsID = ((DbSNPFeature)d).getRsID();
break;
}
} else if ( d instanceof VariantContext) {
if ( ((VariantContext)d).isSNP() ) {
rsID = ((VariantContext)d).getID();
break;
}
}
}
return rsID;
}
public static String rsIDOfFirstRealIndel(List<Object> featureList) {
if (featureList == null)
return null;
String rsID = null;
for ( Object d : featureList ) {
if ( d instanceof DbSNPFeature ) {
if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) {
rsID = ((DbSNPFeature)d).getRsID();
break;
}
} else if ( d instanceof VariantContext) {
if ( ((VariantContext)d).isIndel() ) {
rsID = ((VariantContext)d).getID();
break;
}
}
}
return rsID;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public static double getNegLog10PError(DbSNPFeature feature) {
return 4; // -log10(0.0001)
}
//
// What kind of variant are we?
//
// ----------------------------------------------------------------------
public static boolean isSNP(DbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
}
public static boolean isMNP(DbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
}
public static String toMediumString(DbSNPFeature feature) {
String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved()));
if (isSNP(feature)) s += ":SNP";
if (isIndel(feature)) s += ":Indel";
if (isHapmap(feature)) s += ":Hapmap";
if (is2Hit2Allele(feature)) s += ":2Hit";
return s;
}
public static boolean isInsertion(DbSNPFeature feature) {
return feature.getVariantType().contains("insertion");
}
public static boolean isDeletion(DbSNPFeature feature) {
return feature.getVariantType().contains("deletion");
}
public static boolean isIndel(DbSNPFeature feature) {
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature);
}
public static boolean isComplexIndel(DbSNPFeature feature) {
return feature.getVariantType().contains("in-del");
}
public static boolean isHapmap(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-hapmap");
}
public static boolean is2Hit2Allele(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-2hit-2allele");
}
public static boolean is1000genomes(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-1000genomes");
}
public static boolean isMQ1(DbSNPFeature feature) {
return feature.getWeight() == 1;
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
return ret;
}
public static boolean onFwdStrand(DbSNPFeature feature) {
return feature.getStrand() == Strand.POSITIVE;
}
public static String getReference(DbSNPFeature feature) {
return feature.getNCBIRefBase();
}
public static String toSimpleString(DbSNPFeature feature) {
return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-");
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public static List<String> getAlleleList(DbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>();
// add ref first
if ( onFwdStrand(feature) )
alleleList = Arrays.asList(feature.getObserved());
else
for (String str : feature.getObserved())
alleleList.add(SequenceUtil.reverseComplement(str));
if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) )
Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0);
return alleleList;
}
}

View File

@ -1,21 +1,25 @@
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.*;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
/**
* Container class for GATK report tables
*/
public class GATKReport {
private TreeMap<String, GATKReportTable> tables;
public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/**
* Create a new, empty GATKReport.
*/
public GATKReport() {
tables = new TreeMap<String, GATKReportTable>();
}
/**
@ -23,7 +27,7 @@ public class GATKReport {
* @param filename the path to the file to load
*/
public GATKReport(String filename) {
loadReport(new File(filename));
this(new File(filename));
}
/**
@ -31,7 +35,6 @@ public class GATKReport {
* @param file the file to load
*/
public GATKReport(File file) {
tables = new TreeMap<String, GATKReportTable>();
loadReport(file);
}
@ -46,11 +49,17 @@ public class GATKReport {
GATKReportTable table = null;
String[] header = null;
int id = 0;
GATKReportVersion version = null;
List<Integer> columnStarts = null;
String line;
while ( (line = reader.readLine()) != null ) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
line = line.replaceFirst("##:GATKReport.v0.1 ", "");
if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
version = GATKReportVersion.fromHeader(line);
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
String[] pieces = line.split(" : ");
String tableName = pieces[0];
@ -58,14 +67,35 @@ public class GATKReport {
addTable(tableName, tableDesc);
table = getTable(tableName);
table.setVersion(version);
header = null;
} else if ( line.isEmpty() ) {
columnStarts = null;
} else if ( line.trim().isEmpty() ) {
// do nothing
} else {
if (table != null) {
String[] splitLine;
switch (version) {
case V0_1:
splitLine = TextFormattingUtils.splitWhiteSpace(line);
break;
case V0_2:
if (header == null) {
columnStarts = TextFormattingUtils.getWordStarts(line);
}
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
break;
default:
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
}
if (header == null) {
header = line.split("\\s+");
header = splitLine;
table.addPrimaryKey("id", false);
@ -75,10 +105,8 @@ public class GATKReport {
id = 0;
} else {
String[] entries = line.split("\\s+");
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
table.set(id, header[columnIndex], entries[columnIndex]);
table.set(id, header[columnIndex], splitLine[columnIndex]);
}
id++;
@ -125,7 +153,10 @@ public class GATKReport {
* @return the table object
*/
public GATKReportTable getTable(String tableName) {
return tables.get(tableName);
GATKReportTable table = tables.get(tableName);
if (table == null)
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
return table;
}
/**
@ -140,4 +171,8 @@ public class GATKReport {
}
}
}
public Collection<GATKReportTable> getTables() {
return tables.values();
}
}

View File

@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
* values) in the table.
*
* @param primaryKey the primary key position in the column that should be set
* @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set
*/
public Object getWithoutSideEffects(Object primaryKey) {
private Object getWithoutSideEffects(Object primaryKey) {
if (!this.containsKey(primaryKey)) {
return defaultValue;
}
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return this.get(primaryKey);
}
/**
* Return an object from the column, but if it doesn't exist, return the default value.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
*/
public String getStringValue(Object primaryKey) {
return toString(getWithoutSideEffects(primaryKey));
}
/**
* Return the displayable property of the column. If true, the column will be displayed in the final output.
* If not, printing will be suppressed for the contents of the table.
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
for (Object obj : this.values()) {
if (obj != null) {
int width = obj.toString().length();
int width = toString(obj).length();
if (width > maxWidth) {
maxWidth = width;
@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return maxWidth;
}
/**
* Returns a string version of the values.
* @param obj The object to convert to a string
* @return The string representation of the column
*/
private static String toString(Object obj) {
String value;
if (obj == null) {
value = "null";
} else if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
return value;
}
public String getColumnName() {
return columnName;
}
}

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import java.util.*;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
/**
* Returns the column by index
* @param i the index
* @return The column
*/
public GATKReportColumn getByIndex(int i) {
return get(columnNames.get(i));
}
@Override
public GATKReportColumn remove(Object key) {
columnNames.remove(key);
return super.remove(key);
}
@Override
public GATKReportColumn put(String key, GATKReportColumn value) {
columnNames.add(key);
return super.put(key, value);
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class GATKReportParser {
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
public void parse(File file) throws IOException {
InputStream stream = FileUtils.openInputStream(file);
try {
parse(stream);
} finally {
IOUtils.closeQuietly(stream);
}
}
public void parse(InputStream input) throws IOException {
GATKReportTableParser table = null;
for (String line: new XReadLines(input)) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
table = newTableParser(line);
tables.add(table);
table.parse(line);
} else if (table != null) {
if (line.trim().length() == 0)
table = null;
else
table.parse(line);
}
}
}
public String getValue(String tableName, String[] key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
public String getValue(String tableName, String key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
private GATKReportTableParser newTableParser(String header) {
return new GATKReportTableParser();
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.ObjectUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream;
@ -88,17 +89,22 @@ import java.util.regex.Pattern;
* but at least the prototype contained herein works.
*
* @author Kiran Garimella
* @author Khalid Shakir
*/
public class GATKReportTable {
/** REGEX that matches any table with an invalid name */
public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
private String tableName;
private String tableDescription;
private GATKReportVersion version = LATEST_REPORT_VERSION;
private String primaryKeyName;
private Collection<Object> primaryKeyColumn;
private boolean primaryKeyDisplay;
boolean sortByPrimaryKey = true;
private boolean sortByPrimaryKey = true;
private LinkedHashMap<String, GATKReportColumn> columns;
private GATKReportColumns columns;
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
@ -107,12 +113,25 @@ public class GATKReportTable {
* @return true if the name is valid, false if otherwise
*/
private boolean isValidName(String name) {
Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]");
Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
Matcher m = p.matcher(name);
return !m.find();
}
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
Matcher m = p.matcher(description);
return !m.find();
}
/**
* Construct a new GATK report table with the specified name and description
*
@ -128,11 +147,23 @@ public class GATKReportTable {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
}
if (!isValidDescription(tableDescription)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
}
this.tableName = tableName;
this.tableDescription = tableDescription;
this.sortByPrimaryKey = sortByPrimaryKey;
columns = new LinkedHashMap<String, GATKReportColumn>();
columns = new GATKReportColumns();
}
public GATKReportVersion getVersion() {
return version;
}
protected void setVersion(GATKReportVersion version) {
this.version = version;
}
/**
@ -161,6 +192,57 @@ public class GATKReportTable {
primaryKeyDisplay = display;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKey(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
return key;
}
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
public boolean containsPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues) != null;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
private Object findPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues.split("\\."));
}
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
* @param columnValues column values.
* @return The first primary key matching the column values.
*/
private Object findPrimaryKey(Object[] columnValues) {
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
}
if (matching)
return primaryKey;
}
return null;
}
/**
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
*
@ -230,6 +312,17 @@ public class GATKReportTable {
return columns.get(columnName).get(primaryKey);
}
/**
* Get a value from the given position in the table
*
* @param primaryKey the primary key value
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
*/
private Object get(Object primaryKey, int columnIndex) {
return columns.getByIndex(columnIndex).get(primaryKey);
}
/**
* Increment an element in the table. This implementation is awful - a functor would probably be better.
*
@ -515,7 +608,7 @@ public class GATKReportTable {
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
// Emit the table definition
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription);
out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column
boolean needsPadding = false;
@ -545,22 +638,8 @@ public class GATKReportTable {
for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) {
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
if (needsPadding) { out.printf(" "); }
String value = "null";
if (obj != null) {
if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
}
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
String value = columns.get(columnName).getStringValue(primaryKey);
out.printf(columnWidths.get(columnName), value);
needsPadding = true;
@ -577,4 +656,16 @@ public class GATKReportTable {
public int getNumRows() {
return primaryKeyColumn.size();
}
public String getTableName() {
return tableName;
}
public String getTableDescription() {
return tableDescription;
}
public GATKReportColumns getColumns() {
return columns;
}
}

View File

@ -1,75 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.StringUtils;
import java.util.*;
public class GATKReportTableParser {
private int lineNum = 0;
private String[] descriptions;
private Map<String, Integer> headers = new HashMap<String, Integer>();
private List<String[]> values = new ArrayList<String[]>();
public void parse(String line) {
lineNum++;
switch (lineNum) {
case 1:
descriptions = parseLine(line);
case 2:
String[] columnHeaders = parseLine(line);
for (int i = 0; i < columnHeaders.length; i++)
headers.put(columnHeaders[i], i);
default:
values.add(parseLine(line));
}
}
public String getTableName() {
return descriptions[1];
}
public String getValue(String[] key, String column) {
if (!headers.containsKey(column))
return null;
for (String[] row: values)
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
return row[headers.get(column)];
return null;
}
public String getValue(String key, String column) {
return getValue(key.split("\\."), column);
}
private String generateKey(String[] row, int i) {
return StringUtils.join(row, ".", 0, i);
}
private String[] parseLine(String line) {
return line.split(" +");
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum GATKReportVersion {
/**
* Differences between other versions:
* - Does not allow spaces in cells.
* - Mostly fixed width but has a bug where the string width of floating point
* values was not measured correctly leading to columns that aren't aligned
*/
V0_1("v0.1"),
/**
* Differences between other versions:
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
* - Fixed width fixed for floating point values
*/
V0_2("v0.2");
public final String versionString;
private GATKReportVersion(String versionString) {
this.versionString = versionString;
}
@Override
public String toString() {
return versionString;
}
/**
* Returns the GATK Report Version from the file header.
* @param header Header from the file starting with ##:GATKReport.v[version]
* @return The version as an enum.
*/
public static GATKReportVersion fromHeader(String header) {
if (header.startsWith("##:GATKReport.v0.1 "))
return GATKReportVersion.V0_1;
if (header.startsWith("##:GATKReport.v0.2 "))
return GATKReportVersion.V0_2;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
/** We use the SimpleTimer to time our run */
private SimpleTimer timer = new SimpleTimer("Traversal");
private SimpleTimer timer = null;
// How long can we go without printing some progress info?
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
private int printProgressCheckCounter = 0;
private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
}
}
/**
* Should be called to indicate that we're going to process records and the timer should start ticking
* Should be called to indicate that we're going to process records and the timer should start ticking. This
* function should be called right before any traversal work is done, to avoid counting setup costs in the
* processing costs and inflating the estimated runtime.
*/
public void startTimers() {
timer.start();
lastProgressPrintTime = timer.currentTime();
public void startTimersIfNecessary() {
if ( timer == null ) {
timer = new SimpleTimer("Traversal");
timer.start();
lastProgressPrintTime = timer.currentTime();
}
}
/**
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
* @return true if the maximum interval (in millisecs) has passed since the last printing
*/
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
return (curTime - lastPrintTime) > printFreq;
long elapsed = curTime - lastPrintTime;
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
}
/**

View File

@ -173,7 +173,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* -> those with the same mate pair position, for paired reads
* -> those flagged as unpaired and duplicated but having the same start and end
*/
boolean done = walker.isDone();
for (SAMRecord read : iter) {
if ( done ) break;
// get the genome loc from the read
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
@ -194,6 +196,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
}
printProgress(dataProvider.getShard(),site);
done = walker.isDone();
}
return sum;

View File

@ -33,6 +33,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
LocusView locusView = getLocusView( walker, dataProvider );
boolean done = false;
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
@ -46,7 +47,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
// We keep processing while the next reference location is within the interval
while( locusView.hasNext() ) {
while( locusView.hasNext() && ! done ) {
AlignmentContext locus = locusView.next();
GenomeLoc location = locus.getLocation();
@ -65,26 +66,28 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
referenceView.expandBoundsToAccomodateLoc(location);
}
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
// create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
final boolean keepMeP = walker.filter(tracker, refContext, locus);
if (keepMeP) {
M x = walker.map(tracker, refContext, locus);
sum = walker.reduce(x, sum);
done = walker.isDone();
}
printProgress(dataProvider.getShard(),locus.getLocation());
}
}
// We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) {
// We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
// only do this if the walker isn't done!
RodLocusView rodLocusView = (RodLocusView)locusView;
long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) {

View File

@ -50,7 +50,9 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
ReadView reads = new ReadView(dataProvider);
List<SAMRecord> pairs = new ArrayList<SAMRecord>();
boolean done = walker.isDone();
for(SAMRecord read: reads) {
if ( done ) break;
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
@ -65,6 +67,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
printProgress(dataProvider.getShard(),null);
}
done = walker.isDone();
}
// If any data was left in the queue, process it.

View File

@ -82,8 +82,10 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
// get the reference ordered data
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
boolean done = walker.isDone();
// while we still have more reads
for (SAMRecord read : reads) {
if ( done ) break;
// ReferenceContext -- the reference bases covered by the read
ReferenceContext refContext = null;
@ -106,6 +108,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
printProgress(dataProvider.getShard(),locus);
done = walker.isDone();
}
return sum;
}

View File

@ -23,5 +23,4 @@ import java.lang.annotation.*;
@Target(ElementType.TYPE)
public @interface Allows {
DataSource[] value();
RMD[] referenceMetaData() default {};
}

View File

@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
@ -50,44 +52,158 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles.
* This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.
*
*
* <p>
* It allows the user to clip bases in reads with poor quality scores, that match particular
* sequences, or that were generated by particular machine cycles.
*
* <dl>
* <dt>Quality score based clipping</dt>
* <dd>
* Clip bases from the read in clipper from
* <br>argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)</br>
* to the end of the read. This is blatantly stolen from BWA.
*
* Walk through the read from the end (in machine cycle order) to the beginning, calculating the
* running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this
* sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the
* clipping index in the read (from the end).
* </dd>
* <dt>Cycle based clipping</dt>
* <dd>Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions).
* For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12.
* </dd>
* <dt>Sequence matching</dt>
* <dd>Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm,
* filtering only bases whose sequence exactly matches SEQ.</dd>
* </dl>
*
* </p>
*
* <h2>Input</h2>
* <p>
* Any number of BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A new BAM file containing all of the reads from the input BAMs with the user-specified clipping
* operation applied to each read.
* </p>
* <p>
* <h3>Summary output</h3>
* <pre>
* Number of examined reads 13
* Number of clipped reads 13
* Percent of clipped reads 100.00
* Number of examined bases 988
* Number of clipped bases 126
* Percent of clipped bases 12.75
* Number of quality-score clipped bases 126
* Number of range clipped bases 0
* Number of sequence clipped bases 0
* </pre>
* </p>
*
* <p>
* <h3>Example clipping</h3>
* Suppose we are given this read:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* If we are clipping reads with -QT 10 and -CR WRITE_NS, we get:
*
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* Whereas with -CR WRITE_Q0S:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* Or -CR SOFTCLIP_BASES:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3133 29 17S59M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
* </p>
*
* <h2>Examples</h2>
* <pre>
* -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
* -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
* </pre>
* @author Mark DePristo
* @since 2010
*/
@Requires({DataSource.READS})
public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.ClippingData> {
@Output
PrintStream out;
public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
/**
* If provided, ClipReads will write summary statistics about the clipping operations applied
* to the reads to this file.
*/
@Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false)
PrintStream out = null;
/**
* an optional argument to dump the reads out to a BAM file
* The output SAM/BAM file will be written here
*/
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false)
StingSAMFileWriter outputBam = null;
@Output(doc = "Write BAM output here", required = true)
StingSAMFileWriter outputBam;
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false)
/**
* If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this
* quality score threshold.
*/
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false)
int qTrimmingThreshold = -1;
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false)
/**
* Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based
* values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11,
* and 12.
*/
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false)
String cyclesToClipArg = null;
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false)
/**
* Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the
* sequences in the file.
*/
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false)
String clipSequenceFile = null;
/**
* Clips bases from the reads matching the provided SEQ. Can be provided any number of times on the command line
*/
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
String[] clipSequencesArgs = null;
@Argument(fullName="read", doc="", required=false)
String onlyDoRead = null;
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false)
//boolean keepCompletelyClippedReads = false;
// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
// boolean onlyClipFirstSeqMatch = false;
/**
* The different values for this argument determines how ClipReads applies clips to the reads. This can range
* from writing Ns over the clipped bases to hard clipping away the bases from the BAM.
*/
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
@Hidden
@Advanced
@Argument(fullName="read", doc="", required=false)
String onlyDoRead = null;
/**
* List of sequence that should be clipped from the reads
@ -180,12 +296,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
* @param read the read itself, as a SAMRecord
* @return the ReadClipper object describing what should be done to clip this read
*/
public ReadClipper map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
read = ReadUtils.replaceSoftClipsWithMatches(read);
}
ReadClipper clipper = new ReadClipper(read);
ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
//
// run all three clipping modules
@ -205,9 +321,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
*
* @param clipper
*/
private void clipSequences(ReadClipper clipper) {
private void clipSequences(ReadClipperWithData clipper) {
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (SeqToClip stc : sequencesToClip) {
// we have a pattern for both the forward and the reverse strands
@ -223,11 +340,14 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
if (found) {
int start = match.start();
int stop = match.end() - 1;
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op);
data.incSeqClippedBases(stc.seq, op.getLength());
}
}
}
clipper.setData(data);
}
}
@ -252,9 +372,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
*
* @param clipper
*/
private void clipCycles(ReadClipper clipper) {
private void clipCycles(ReadClipperWithData clipper) {
if (cyclesToClip != null) {
SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
int cycleStart = p.first;
@ -270,10 +391,13 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
int start = startStop.first;
int stop = startStop.second;
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
//ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op);
data.incNRangeClippedBases(op.getLength());
}
}
clipper.setData(data);
}
}
@ -291,8 +415,9 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
*
* @param clipper
*/
private void clipBadQualityScores(ReadClipper clipper) {
private void clipBadQualityScores(ReadClipperWithData clipper) {
SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
int readLen = read.getReadBases().length;
byte[] quals = read.getBaseQualities();
@ -311,8 +436,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
if (clipPoint != -1) {
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
//clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op);
data.incNQClippedBases(op.getLength());
}
clipper.setData(data);
}
/**
@ -325,7 +454,7 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
return new ClippingData(sequencesToClip);
}
public ClippingData reduce(ReadClipper clipper, ClippingData data) {
public ClippingData reduce(ReadClipperWithData clipper, ClippingData data) {
if ( clipper == null )
return data;
@ -340,23 +469,8 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
data.nTotalBases += clipper.getRead().getReadLength();
if (clipper.wasClipped()) {
data.nClippedReads++;
for (ClippingOp op : clipper.getOps()) {
switch (op.type) {
case LOW_Q_SCORES:
data.incNQClippedBases(op.getLength());
break;
case WITHIN_CLIP_RANGE:
data.incNRangeClippedBases(op.getLength());
break;
case MATCHES_CLIP_SEQ:
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
break;
default:
throw new IllegalStateException("Unexpected Clipping operator type " + op);
}
}
data.addData(clipper.getData());
}
return data;
}
@ -417,6 +531,23 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
}
public void addData (ClippingData data) {
nTotalReads += data.nTotalReads;
nTotalBases += data.nTotalBases;
nClippedReads += data.nClippedReads;
nClippedBases += data.nClippedBases;
nQClippedBases += data.nQClippedBases;
nRangeClippedBases += data.nRangeClippedBases;
nSeqClippedBases += data.nSeqClippedBases;
for (String seqClip : data.seqClipCounts.keySet()) {
Long count = data.seqClipCounts.get(seqClip);
if (seqClipCounts.containsKey(seqClip))
count += seqClipCounts.get(seqClip);
seqClipCounts.put(seqClip, count);
}
}
public String toString() {
StringBuilder s = new StringBuilder();
@ -439,4 +570,27 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
return s.toString();
}
}
public class ReadClipperWithData extends ReadClipper {
private ClippingData data;
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
super(read);
data = new ClippingData(clipSeqs);
}
public ClippingData getData() {
return data;
}
public void setData(ClippingData data) {
this.data = data;
}
public void addData(ClippingData data) {
this.data.addData(data);
}
}
}

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
@ -17,7 +17,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates.
*/
@Requires({DataSource.READS,DataSource.REFERENCE})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.INTERVAL)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -25,15 +25,14 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -41,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
/**
@ -68,6 +68,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
public boolean SHOW_INDEL_PILEUPS = false;
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
public List<RodBinding<Feature>> rods = Collections.emptyList();
public void initialize() {
}
@ -112,18 +115,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
*/
private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
ArrayList<String> rodStrings = new ArrayList<String>();
for ( GATKFeature datum : tracker.getAllRods() ) {
if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) {
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
}
for ( Feature datum : tracker.getValues(rods) ) {
rodStrings.add(datum.toString());
}
String rodString = Utils.join(", ", rodStrings);
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
if ( dbsnp != null)
rodString += DbSNPHelper.toMediumString(dbsnp);
if ( !rodString.equals("") )
rodString = "[ROD: " + rodString + "]";
@ -132,8 +128,6 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
@Override
public void onTraversalDone(Integer result) {
// Double check traversal result to make count is the same.
// TODO: Is this check necessary?
out.println("[REDUCE RESULT] Traversal result is: " + result);
}
}

View File

@ -25,21 +25,24 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import java.io.PrintStream;
import java.util.Iterator;
/**
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method
* of the given ROD.
*/
public class PrintRODsWalker extends RodWalker<Integer, Integer> {
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
public RodBinding<Feature> input;
@Output
PrintStream out;
@ -61,11 +64,8 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
if ( tracker == null )
return 0;
Iterator<GATKFeature> rods = tracker.getAllRods().iterator();
while ( rods.hasNext() ) {
Object rod = rods.next().getUnderlyingObject();
if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) )
out.println(rod.toString());
for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
out.println(feature.toString());
}
return 1;

View File

@ -40,26 +40,72 @@ import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
/**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter
* command line argument.
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
*
* <p>
* PrintReads can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the
* --read_filter command line argument.
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* A single processed bam file.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
*
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input.bam \
* -n 2000
* </pre>
*
*/
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
String readGroup = null;
/**
* For example, --platform ILLUMINA or --platform 454.
*/
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
String platform = null; // E.g. ILLUMINA, 454
String platform = null;
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1;
/**
* Only reads from samples listed in the provided file(s) will be included in the output.
*/
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
public Set<File> sampleFile = new TreeSet<File>();
/**
* Only reads from the sample(s) will be included in the output.
*/
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames = new TreeSet<String>();

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers;
import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.lang.annotation.*;

View File

@ -26,11 +26,14 @@
package org.broadinstitute.sting.gatk.walkers;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
import java.util.List;
@ -44,6 +47,10 @@ import java.util.List;
@ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@DocumentedGATKFeature(
groupName = "GATK walkers",
summary = "General tools available for running on the command line as part of the GATK package",
extraDocs = {CommandLineGATK.class})
public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class);
private GenomeAnalysisEngine toolkit;
@ -119,6 +126,17 @@ public abstract class Walker<MapType, ReduceType> {
public void initialize() { }
/**
* A function for overloading in subclasses providing a mechanism to abort early from a walker.
*
* If this ever returns true, then the Traversal engine will stop executing map calls
* and start the process of shutting down the walker in an orderly fashion.
* @return
*/
public boolean isDone() {
return false;
}
/**
* Provide an initial value for reduce computations.
* @return Initial value of reduce.

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -42,9 +43,9 @@ import java.util.List;
import java.util.Map;
public class AlleleBalance implements InfoFieldAnnotation {
public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;
@ -89,7 +90,7 @@ public class AlleleBalance implements InfoFieldAnnotation {
}
// todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size();
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) {
continue;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
@ -15,9 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation {
public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
Double ratio = annotateSNP(stratifiedContext, vc, g);
if (ratio == null)
return null;

View File

@ -8,7 +8,7 @@ import java.util.Map;
public abstract class AnnotationByDepth implements InfoFieldAnnotation {
public abstract class AnnotationByDepth extends InfoFieldAnnotation {
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {

View File

@ -34,6 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -46,9 +47,9 @@ import java.util.List;
import java.util.Map;
public class BaseCounts implements InfoFieldAnnotation {
public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -43,14 +44,14 @@ import java.util.List;
import java.util.Map;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation {
public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() )
return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -16,9 +17,9 @@ import java.util.List;
import java.util.Map;
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation {
public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 )
return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
@ -22,13 +23,13 @@ import java.util.List;
import java.util.Map;
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation {
public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() )
return null;

View File

@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
@ -42,11 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation {
public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.isVariant() || vc.isFiltered() )
return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils;
@ -16,9 +17,9 @@ import java.util.List;
import java.util.Map;
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation {
public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
double content = computeGCContent(ref);
Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", content));

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils;
@ -23,11 +24,11 @@ import java.util.Map;
*/
// A set of annotations calculated directly from the GLs
public class GLstats implements InfoFieldAnnotation, StandardAnnotation {
public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
@ -48,13 +49,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*;
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation {
public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.';
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
return null;

View File

@ -4,6 +4,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.utils.QualityUtils;
@ -18,13 +19,13 @@ import java.util.List;
import java.util.Map;
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation {
public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

Some files were not shown because too many files have changed in this diff Show More