Merge remote-tracking branch 'unstable/master'

This commit is contained in:
Eric Banks 2011-09-15 21:14:17 -04:00
commit 3cd9f3fe81
434 changed files with 19991 additions and 14388 deletions

477
build.xml
View File

@ -43,16 +43,17 @@
<property name="scala.classes" value="${build.dir}/scala/classes" /> <property name="scala.classes" value="${build.dir}/scala/classes" />
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" /> <property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
<property name="javadoc.dir" value="javadoc" />
<property name="scaladoc.dir" value="scaladoc" />
<!-- Contracts for Java --> <!-- Contracts for Java -->
<!-- To disable, run with -Duse.contracts=false --> <!-- By default, enabled only for test targets -->
<property name="use.contracts" value="true" /> <!-- To disable for test targets, run with -Duse.contracts=false -->
<!-- To enable for non-test targets, run with -Duse.contracts=true -->
<property name="java.contracts" value="${build.dir}/java/contracts" /> <property name="java.contracts" value="${build.dir}/java/contracts" />
<property name="contracts.version" value="1.0-20110609" /> <property name="contracts.version" value="1.0-20110609" />
<property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/> <property name="cofoja.jar" value="${lib.dir}/cofoja-${contracts.version}.jar"/>
<!-- where to find the tribble distro -->
<property name="tribble.dir" value="tribble" />
<!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' --> <!-- where to find 'findbugs', which you must set if you plan to use 'ant findbugs' -->
<property name="findbugs.home" value="./findbugs"/> <property name="findbugs.home" value="./findbugs"/>
@ -79,6 +80,16 @@
<patternset refid="java.source.pattern" /> <patternset refid="java.source.pattern" />
</fileset> </fileset>
<!-- terrible hack to get gatkdocs to see all files -->
<patternset id="all.java.source.pattern">
<include name="${java.public.source.dir}/**/*.java" />
<include name="${java.private.source.dir}/**/*.java" />
</patternset>
<fileset id="all.java.source.files" dir="${basedir}">
<patternset refid="all.java.source.pattern" />
</fileset>
<fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false"> <fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
<include name="**/*.java" /> <include name="**/*.java" />
</fileset> </fileset>
@ -160,6 +171,16 @@
<property name="scala.target" value="core"/> <property name="scala.target" value="core"/>
</target> </target>
<target name="init.buildpublic">
<!-- Set the properties needed to build public only -->
<property name="gatk.target" value="core"/>
<property name="scala.target" value="core"/>
</target>
<target name="init.usecontracts">
<property name="use.contracts" value="true" />
</target>
<target name="git.describe"> <target name="git.describe">
<exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false"> <exec executable="git" outputproperty="git.describe.output" resultproperty="git.describe.exit.value" failonerror="false">
<arg line="describe" /> <arg line="describe" />
@ -234,6 +255,7 @@
<!-- Create the build directory structure used by compile --> <!-- Create the build directory structure used by compile -->
<mkdir dir="${build.dir}"/> <mkdir dir="${build.dir}"/>
<mkdir dir="${lib.dir}"/>
<mkdir dir="${java.classes}"/> <mkdir dir="${java.classes}"/>
<mkdir dir="${java.contracts}"/> <mkdir dir="${java.contracts}"/>
@ -265,7 +287,7 @@
</taskdef> </taskdef>
</target> </target>
<target name="gatk.compile.public.source" depends="tribble,init,resolve"> <target name="gatk.compile.public.source" depends="init,resolve">
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}"> <javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
<compilerarg value="-proc:none"/> <compilerarg value="-proc:none"/>
</javac> </javac>
@ -326,7 +348,7 @@
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private" <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
description="create GATK contracts" if="include.contracts" /> description="create GATK contracts" if="include.contracts" />
<target name="gatk.compile" depends="tribble,init,resolve,gatk.compile.source,gatk.contracts" /> <target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
<target name="init.queue-extensions.generate" depends="gatk.compile"> <target name="init.queue-extensions.generate" depends="gatk.compile">
<condition property="uptodate.queue-extensions.generate"> <condition property="uptodate.queue-extensions.generate">
@ -457,6 +479,35 @@
</javadoc> </javadoc>
</target> </target>
<target name="clean.gatkdocs">
<delete dir="gatkdocs"/>
</target>
<target name="gatkdocs" depends="gatk.compile"
description="Extract help key/value pair file from the JavaDoc tags.">
<path id="doclet.classpath">
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
</path>
<!-- Run with -Dgatkdocs.include.hidden=true to include documentation for hidden features -->
<condition property="gatkdocs.include.hidden.arg" value="-include-hidden" else="">
<isset property="gatkdocs.include.hidden" />
</condition>
<javadoc doclet="org.broadinstitute.sting.utils.help.GATKDoclet"
docletpathref="doclet.classpath"
classpathref="external.dependencies"
classpath="${java.classes}"
additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet -J-Xdebug -J-Xrunjdwp:transport=dt_socket,server=y,suspend=n,address=5005"> <!-- -test to only do DocumentationTest walker -->
<sourcefiles>
<union>
<fileset refid="all.java.source.files"/>
</union>
</sourcefiles>
</javadoc>
</target>
<target name="sting.compile" depends="gatk.compile, scala.compile" /> <target name="sting.compile" depends="gatk.compile, scala.compile" />
<target name="init.jar" depends="sting.compile,extracthelp"> <target name="init.jar" depends="sting.compile,extracthelp">
@ -490,6 +541,7 @@
<include name="**/utils/codecs/**/*.class"/> <include name="**/utils/codecs/**/*.class"/>
<include name="**/utils/variantcontext/**/*.class"/> <include name="**/utils/variantcontext/**/*.class"/>
<include name="org/broadinstitute/sting/utils/exceptions/**"/> <include name="org/broadinstitute/sting/utils/exceptions/**"/>
<include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
</fileset> </fileset>
</jar> </jar>
</target> </target>
@ -657,54 +709,6 @@
</antcall> </antcall>
</target> </target>
<target name="test.init.compile">
<mkdir dir="${java.test.classes}"/>
<mkdir dir="${scala.test.classes}"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
</target>
<target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
<echo message="Sting: Compiling test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.public.test.sources}"/>
<src path="${java.private.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
<echo message="Scala: Compiling test cases!"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.public.test.sources}" />
<src path="${scala.private.test.sources}" />
<include name="**/*.scala"/>
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${scala.test.classes}"/>
<pathelement location="${java.test.classes}"/>
<pathelement location="${lib.dir}/testng-5.14.1.jar"/>
</classpath>
</scalac>
</target>
<target name="test.compile" depends="test.java.compile,test.scala.compile">
</target>
<!-- new scala target --> <!-- new scala target -->
<target name="scala" description="build the scala directory"> <target name="scala" description="build the scala directory">
@ -718,20 +722,113 @@
<!-- ***************************************************************************** --> <!-- ***************************************************************************** -->
<!-- where to put reports and tests--> <!-- where to put reports and tests-->
<property name="report" value="${build.dir}/report"/> <property name="report" value="${build.dir}/report"/>
<property name="java.test.classes" value="${build.dir}/java/testclasses"/>
<property name="test.output" value="${dist.dir}/test"/> <property name="test.output" value="${dist.dir}/test"/>
<property name="java.public.test.sources" value="public/java/test"/> <property name="java.test.classes" value="${build.dir}/java/testclasses"/>
<property name="java.private.test.sources" value="private/java/test"/> <property name="java.public.test.classes" value="${java.test.classes}/public"/>
<property name="java.private.test.classes" value="${java.test.classes}/private"/>
<property name="java.public.test.sources" value="${public.dir}/java/test"/>
<property name="java.private.test.sources" value="${private.dir}/java/test"/>
<property name="scala.test.classes" value="${build.dir}/scala/testclasses"/> <property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
<property name="scala.public.test.sources" value="public/scala/test"/> <property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
<property name="scala.private.test.sources" value="private/scala/test"/> <property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
<property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
<property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
<property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
<!-- provide a ceiling on the memory that unit/integration tests can consume. --> <!-- provide a ceiling on the memory that unit/integration tests can consume. -->
<property name="test.maxmemory" value="4g"/> <property name="test.maxmemory" value="4g"/>
<target name="test.init.compile">
<mkdir dir="${java.test.classes}"/>
<mkdir dir="${scala.test.classes}"/>
<antcall target="resolve">
<param name="ivy.conf" value="test"/>
</antcall>
</target>
<target name="test.java.public.compile" depends="dist,test.init.compile">
<mkdir dir="${java.public.test.classes}"/>
<echo message="Sting: Compiling public test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.public.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${testng.jar}"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
<mkdir dir="${java.private.test.classes}"/>
<echo message="Sting: Compiling private test cases!"/>
<javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
<src path="${java.private.test.sources}"/>
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${java.classes}"/>
<pathelement location="${java.contracts}"/>
<pathelement location="${testng.jar}"/>
</classpath>
<compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac>
</target>
<target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
<target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
<mkdir dir="${scala.public.test.classes}"/>
<echo message="Scala: Compiling public test cases!"/>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.public.test.sources}" />
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${testng.jar}"/>
</classpath>
</scalac>
</target>
<target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
<mkdir dir="${scala.private.test.classes}"/>
<echo message="Scala: Compiling private test cases!"/>
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
<src path="${scala.private.test.sources}" />
<classpath>
<path refid="scala.dependencies"/>
<pathelement location="${scala.public.test.classes}"/>
<pathelement location="${java.public.test.classes}"/>
<pathelement location="${java.private.test.classes}"/>
<pathelement location="${testng.jar}"/>
</classpath>
</scalac>
</target>
<target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
<target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
<!-- TEST --> <!-- TEST -->
<macrodef name="run-test"> <macrodef name="run-test">
<attribute name="testtype"/> <attribute name="testtype"/>
<attribute name="outputdir"/>
<attribute name="runfailed"/>
<sequential> <sequential>
<condition property="run.failed.tests">
<equals arg1="@{runfailed}" arg2="true"/>
</condition>
<!-- Get the pipeline run type. Default to dry. --> <!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}"> <condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" /> <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
@ -741,10 +838,10 @@
<isset property="include.contracts" /> <isset property="include.contracts" />
</condition> </condition>
<mkdir dir="${report}/@{testtype}"/> <mkdir dir="@{outputdir}"/>
<echo message="Sting: Running @{testtype} test cases!"/> <echo message="Sting: Running @{testtype} test cases!"/>
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/> <taskdef resource="testngtasks" classpath="${testng.jar}"/>
<testng outputDir="${report}/@{testtype}" <testng outputDir="@{outputdir}"
haltOnFailure="false" failureProperty="test.failure" haltOnFailure="false" failureProperty="test.failure"
verbose="2" verbose="2"
workingDir="${basedir}" workingDir="${basedir}"
@ -762,199 +859,163 @@
<pathelement location="${java.classes}" /> <pathelement location="${java.classes}" />
<pathelement location="${scala.classes}" /> <pathelement location="${scala.classes}" />
<pathelement location="${java.contracts}" /> <pathelement location="${java.contracts}" />
<pathelement location="${java.test.classes}" /> <pathelement location="${java.public.test.classes}" />
<pathelement location="${scala.test.classes}" /> <pathelement location="${java.private.test.classes}" />
<pathelement location="${scala.public.test.classes}" />
<pathelement location="${scala.private.test.classes}" />
</classpath> </classpath>
<classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/> <classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
<classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" /> <classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}.class" if="include.private"/>
</classfileset>
<classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}*.class" if="scala.include"/>
</classfileset>
<classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
<include name="**/@{testtype}*.class" if="include.scala.private"/>
</classfileset>
<xmlfileset dir="${basedir}">
<include name="@{testtype}" if="run.failed.tests"/>
</xmlfileset>
</testng> </testng>
<!-- generate a report for Bamboo or Hudson to read in --> <!-- generate a report for Bamboo or Hudson to read in -->
<junitreport todir="${report}/@{testtype}"> <junitreport todir="@{outputdir}">
<fileset dir="${report}/@{testtype}"> <fileset dir="@{outputdir}">
<include name="*/*.xml"/> <include name="*/*.xml"/>
</fileset> </fileset>
<report format="noframes" todir="${report}/@{testtype}"/> <report format="noframes" todir="@{outputdir}"/>
</junitreport> </junitreport>
<fail message="test failed" if="test.failure" />
</sequential>
</macrodef>
<!-- FAILED-TEST -->
<macrodef name="run-failed-test">
<attribute name="xmlfailedtestfile" />
<sequential>
<!-- Get the pipeline run type. Default to dry. -->
<condition property="pipeline.run" value="dry" else="${pipeline.run}">
<equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
</condition>
<condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
<isset property="include.contracts" />
</condition>
<mkdir dir="${report}/failed_rerun" />
<echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
<taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
<testng outputDir="${report}/failed_rerun"
haltOnFailure="false" failureProperty="test.failure"
verbose="2"
workingDir="${basedir}"
useDefaultListeners="false"
listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
<jvmarg value="-Xmx${test.maxmemory}" />
<jvmarg value="-Djava.awt.headless=true" />
<jvmarg value="-Dpipeline.run=${pipeline.run}" />
<jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
<jvmarg line="${cofoja.jvm.args}"/>
<!-- <jvmarg value="-Xdebug"/> -->
<!-- <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
<classpath>
<path refid="external.dependencies" />
<pathelement location="${java.classes}" />
<pathelement location="${scala.classes}" />
<pathelement location="${java.contracts}" />
<pathelement location="${java.test.classes}" />
<pathelement location="${scala.test.classes}" />
</classpath>
<xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
</testng>
<fail message="test failed" if="test.failure" /> <fail message="test failed" if="test.failure" />
</sequential> </sequential>
</macrodef> </macrodef>
<!-- our three different test conditions: Test, IntegrationTest, PerformanceTest --> <target name="alltests">
<target name="test" depends="test.compile,tribble.test" description="Run unit tests"> <antcall target="test" inheritAll="false"/>
<antcall target="integrationtest" inheritAll="false"/>
<antcall target="pipelinetest" inheritAll="false"/>
</target>
<target name="alltests.public">
<antcall target="test.public" inheritAll="false"/>
<antcall target="integrationtest.public" inheritAll="false"/>
<antcall target="pipelinetest.public" inheritAll="false"/>
</target>
<!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
<target name="test" depends="init.buildall,test.compile" description="Run unit tests">
<condition property="ttype" value="*UnitTest" else="${single}"> <condition property="ttype" value="*UnitTest" else="${single}">
<not><isset property="single"/></not> <not><isset property="single"/></not>
</condition> </condition>
<run-test testtype="${ttype}"/> <run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
</target> </target>
<target name="integrationtest" depends="test.compile" description="Run integration tests"> <target name="test.public" depends="init.buildpublic,test"/>
<target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
<condition property="itype" value="*IntegrationTest" else="${single}"> <condition property="itype" value="*IntegrationTest" else="${single}">
<not><isset property="single"/></not> <not><isset property="single"/></not>
</condition> </condition>
<run-test testtype="${itype}"/> <run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
</target> </target>
<target name="performancetest" depends="test.compile" description="Run performance tests"> <target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
<target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
<condition property="ptype" value="*PerformanceTest" else="${single}"> <condition property="ptype" value="*PerformanceTest" else="${single}">
<not><isset property="single"/></not> <not><isset property="single"/></not>
</condition> </condition>
<run-test testtype="${ptype}"/> <run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
</target> </target>
<target name="pipelinetest" depends="test.compile" description="Run pipeline tests"> <target name="performancetest.public" depends="init.buildpublic,performancetest" />
<target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
<condition property="pipetype" value="*PipelineTest" else="${single}"> <condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not> <not><isset property="single"/></not>
</condition> </condition>
<run-test testtype="${pipetype}"/> <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target> </target>
<target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests"> <target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
<target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
<property name="pipeline.run" value="run"/> <property name="pipeline.run" value="run"/>
<condition property="pipetype" value="*PipelineTest" else="${single}"> <condition property="pipetype" value="*PipelineTest" else="${single}">
<not><isset property="single"/></not> <not><isset property="single"/></not>
</condition> </condition>
<run-test testtype="${pipetype}"/> <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
</target>
<target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
<target name="failed-test" depends="init.buildall,test.compile">
<run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target> </target>
<target name="failed-test" depends="test.compile"> <target name="failed-integration" depends="init.buildall,test.compile">
<run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" /> <run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target> </target>
<target name="failed-integration" depends="test.compile"> <target name="failed-performance" depends="init.buildall,test.compile">
<run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" /> <run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target> </target>
<target name="failed-performance" depends="test.compile"> <target name="failed-pipeline" depends="init.buildall,test.compile">
<run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" /> <run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
</target> </target>
<target name="failed-pipeline" depends="test.compile"> <!-- ******************************************************************************** -->
<run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" /> <!-- Javadoc -->
</target> <!-- ******************************************************************************** -->
<!-- ***************************************************************************** -->
<!-- *********** Tribble ********* -->
<!-- ***************************************************************************** -->
<target name="tribble.init" description="checks if tribble is available to build from source">
<condition property="tribble.compile.exists">
<available file="${tribble.dir}/build.xml"/>
</condition>
</target>
<!-- compile the library -->
<target name="tribble.compile" description="compiles the tribble library" depends="tribble.init" if="tribble.compile.exists">
<echo message="Building the Tribble Library..."/>
<ant antfile="build.xml" target="all" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- copy the compiled library -->
<target name="tribble.compile.copy" description="Copies the compiled tribble library" depends="tribble.compile" if="tribble.compile.exists">
<copy todir="${lib.dir}">
<fileset dir="${tribble.dir}/dist" includes="*.jar"/>
</copy>
</target>
<!-- copy the precompiled library -->
<target name="tribble.library.copy" description="Copies the precompiled tribble library" depends="tribble.init" unless="tribble.compile.exists">
<echo message="Copying the Tribble Library..."/>
<copy todir="${lib.dir}">
<fileset dir="settings/repository/org.broad" includes="tribble*.jar"/>
</copy>
</target>
<target name="tribble" description="Copies the tribble jar" depends="tribble.compile.copy,tribble.library.copy"/>
<target name="tribble.test.init" description="runs the tribble tests" depends="tribble.init">
<condition property="tribble.test.run">
<and>
<isset property="tribble.compile.exists"/>
<not><isset property="single"/></not>
</and>
</condition>
</target>
<!-- test tribble using the unit tests set in tribble -->
<target name="tribble.test" description="runs the tribble tests" depends="tribble.test.init,tribble.compile" if="tribble.test.run">
<echo message="Testing the Tribble Library..."/>
<ant antfile="build.xml" target="test" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- clean tribble -->
<target name="tribble.clean" description="cleans the tribble library" depends="tribble.init" if="tribble.compile.exists">
<echo message="Cleaning the Tribble Library..."/>
<ant antfile="build.xml" target="clean" dir="${tribble.dir}" inheritAll="false"/>
</target>
<!-- ***************************************************************************** -->
<target name="clean.javadoc"> <target name="clean.javadoc">
<delete dir="javadoc"/> <delete dir="${javadoc.dir}" />
<delete dir="scaladoc"/>
</target> </target>
<target name="javadoc" depends="init.buildall,resolve,queue-extensions.generate,init.scala.compile" description="generates javadoc"> <target name="init.javadoc">
<mkdir dir="javadoc"/> <mkdir dir="${javadoc.dir}" />
<javadoc destdir="javadoc" </target>
classpathref="external.dependencies">
<sourcepath path="${java.public.source.dir}"/> <target name="javadoc" depends="init.buildpublic,generate.javadoc" description="Generates public javadoc" />
<sourcepath path="${external.dir}"/>
<target name="javadoc.private" depends="init.buildall,generate.javadoc" description="Generates public and private javadoc" />
<target name="generate.javadoc" depends="init.javadoc,resolve">
<javadoc destdir="${javadoc.dir}" classpathref="external.dependencies">
<fileset refid="java.source.files" />
<sourcepath path="${external.dir}" />
</javadoc> </javadoc>
<javadoc destdir="javadoc" </target>
classpathref="external.dependencies">
<sourcepath path="${java.private.source.dir}"/> <!-- ******************************************************************************** -->
<exclude name="**" unless="include.private" /> <!-- Scaladoc -->
</javadoc> <!-- ******************************************************************************** -->
<mkdir dir="scaladoc"/>
<scaladoc srcdir="" destdir="scaladoc" classpathref="scala.dependencies" deprecation="yes" unchecked="yes"> <target name="clean.scaladoc">
<src path="${scala.public.source.dir}"/> <delete dir="${scaladoc.dir}" />
<src path="${scala.private.source.dir}"/> </target>
<src path="${queue-extensions.source.dir}"/>
<include name="**/*.scala"/> <target name="init.scaladoc">
<mkdir dir="${scaladoc.dir}" />
</target>
<!-- NOTE: the scaladoc targets require that the environment variable ANT_OPTS has been set to "-Xmx1G" -->
<target name="scaladoc" depends="init.buildpublic,generate.scaladoc" description="Generates public scaladoc -- set ANT_OPTS to -Xmx1G" />
<target name="scaladoc.private" depends="init.buildall,generate.scaladoc" description="Generates public and private scaladoc -- set ANT_OPTS to -Xmx1G" />
<target name="generate.scaladoc" depends="resolve,queue-extensions.generate,init.scala.compile,scala.compile,init.scaladoc">
<scaladoc srcdir="${basedir}" destdir="${scaladoc.dir}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<include name="${scala.public.source.dir}/**/*.scala" />
<include name="${queue-extensions.source.dir}/**/*.scala" />
<include name="${scala.private.source.dir}/**/*.scala" if="include.private" />
</scaladoc> </scaladoc>
</target> </target>
<!-- ******************************************************************************** -->
<!-- Release-related tasks -->
<!-- ******************************************************************************** -->
<!-- Unzip all classes from their current locations and assemble them in a staging directory --> <!-- Unzip all classes from their current locations and assemble them in a staging directory -->
<target name="stage" description="stage files for distribution"> <target name="stage" description="stage files for distribution">
<mkdir dir="staging"/> <mkdir dir="staging"/>
@ -1044,7 +1105,7 @@
</findbugs> </findbugs>
</target> </target>
<target name="clean" description="clean up" depends="tribble.clean,clean.javadoc"> <target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
<delete dir="out"/> <delete dir="out"/>
<delete dir="${build.dir}"/> <delete dir="${build.dir}"/>
<delete dir="${lib.dir}"/> <delete dir="${lib.dir}"/>

View File

@ -12,6 +12,9 @@
<dependency org="net.sf" name="picard" rev="latest.integration"/> <dependency org="net.sf" name="picard" rev="latest.integration"/>
<dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/> <dependency org="edu.mit.broad" name="picard-private-parts" rev="latest.integration"/>
<!-- Tribble -->
<dependency org="org.broad" name="tribble" rev="latest.integration"/>
<dependency org="log4j" name="log4j" rev="1.2.15"> <dependency org="log4j" name="log4j" rev="1.2.15">
<!-- Don't include javax.mail here in default, only used in scala->default by commons-email --> <!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
<exclude org="javax.mail" /> <exclude org="javax.mail" />
@ -30,6 +33,9 @@
<!-- Dependencies for the graph aligner --> <!-- Dependencies for the graph aligner -->
<dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/> <dependency org="org.jgrapht" name="jgrapht-jdk1.5" rev="0.7.3"/>
<!-- Dependencies for the html walker documention -->
<dependency org="org.freemarker" name="freemarker" rev="2.3.18"/>
<!-- Commons Dependencies --> <!-- Commons Dependencies -->
<dependency org="org.apache.commons" name="commons-email" rev="1.2"/> <dependency org="org.apache.commons" name="commons-email" rev="1.2"/>

View File

@ -0,0 +1,169 @@
library(gsalib)
require("ggplot2")
require("gplots")
#
# Standard command line switch. Can we loaded interactively for development
# or executed with RScript
#
args = commandArgs(TRUE)
onCMDLine = ! is.na(args[1])
if ( onCMDLine ) {
inputFileName = args[1]
outputPDF = args[2]
} else {
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA
}
RUNTIME_UNITS = "(sec)"
ORIGINAL_UNITS_TO_SECONDS = 1/1000
#
# Helper function to aggregate all of the jobs in the report across all tables
#
allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub)
}
#
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
#
plotJobsGantt <- function(gatkReport, sortOverall) {
allJobs = allJobsFromReport(gatkReport)
if ( sortOverall ) {
title = "All jobs, by analysis, by start time"
allJobs = allJobs[order(allJobs$analysisName, allJobs$startTime, decreasing=T), ]
} else {
title = "All jobs, sorted by start time"
allJobs = allJobs[order(allJobs$startTime, decreasing=T), ]
}
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
maxRelTime = max(allJobs$relDoneTime)
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
p <- p + xlim(0, maxRelTime * 1.1)
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
p <- p + ylab("Job")
p <- p + opts(title=title)
print(p)
}
#
# Plots scheduling efficiency at job events
#
plotProgressByTime <- function(gatkReport) {
allJobs = allJobsFromReport(gatkReport)
nJobs = dim(allJobs)[1]
allJobs = allJobs[order(allJobs$startTime, decreasing=F),]
allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime
times = sort(c(allJobs$relStartTime, allJobs$relDoneTime))
countJobs <- function(p) {
s = allJobs$relStartTime
e = allJobs$relDoneTime
x = c() # I wish I knew how to make this work with apply
for ( time in times )
x = c(x, sum(p(s, e, time)))
x
}
pending = countJobs(function(s, e, t) s > t)
done = countJobs(function(s, e, t) e < t)
running = nJobs - pending - done
d = data.frame(times=times, pending=pending, running=running, done=done)
p <- ggplot(data=melt(d, id.vars=c("times")), aes(x=times, y=value, color=variable))
p <- p + facet_grid(variable ~ ., scales="free")
p <- p + geom_line(size=2)
p <- p + xlab(paste("Time since start of first job", RUNTIME_UNITS))
p <- p + opts(title = "Job scheduling")
print(p)
}
#
# Creates tables for each job in this group
#
standardColumns = c("jobName", "startTime", "formattedStartTime", "analysisName", "intermediate", "exechosts", "formattedDoneTime", "doneTime", "runtime")
plotGroup <- function(groupTable) {
name = unique(groupTable$analysisName)[1]
groupAnnotations = setdiff(names(groupTable), standardColumns)
sub = groupTable[,c("jobName", groupAnnotations, "runtime")]
sub = sub[order(sub$iteration, sub$jobName, decreasing=F), ]
# create a table showing each job and all annotations
textplot(sub, show.rownames=F)
title(paste("Job summary for", name, "full itemization"), cex=3)
# create the table for each combination of values in the group, listing iterations in the columns
sum = cast(melt(sub, id.vars=groupAnnotations, measure.vars=c("runtime")), ... ~ iteration, fun.aggregate=mean)
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "itemizing each iteration"), cex=3)
# histogram of job times by groupAnnotations
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
# todo -- how do we group by annotations?
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
p <- p + opts(title=paste("Job runtime histogram for", name))
print(p)
}
# as above, but averaging over all iterations
groupAnnotationsNoIteration = setdiff(groupAnnotations, "iteration")
if ( dim(sub)[1] > 1 ) {
sum = cast(melt(sub, id.vars=groupAnnotationsNoIteration, measure.vars=c("runtime")), ... ~ ., fun.aggregate=c(mean, sd))
textplot(as.data.frame(sum), show.rownames=F)
title(paste("Job summary for", name, "averaging over all iterations"), cex=3)
}
}
# print out some useful basic information
print("Report")
print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) {
convertGroup <- function(g) {
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
g
}
lapply(gatkReportData, convertGroup)
}
# read the table
gatkReportData <- gsa.read.gatkreport(inputFileName)
gatkReportData <- convertUnits(gatkReportData)
#print(summary(gatkReportData))
if ( ! is.na(outputPDF) ) {
pdf(outputPDF, height=8.5, width=11)
}
plotJobsGantt(gatkReportData, T)
plotJobsGantt(gatkReportData, F)
plotProgressByTime(gatkReportData)
for ( group in gatkReportData ) {
plotGroup(group)
}
if ( ! is.na(outputPDF) ) {
dev.off()
}

View File

@ -20,6 +20,20 @@
assign(tableName, d, envir=tableEnv); assign(tableName, d, envir=tableEnv);
} }
# Read a fixed width line of text into a list.
.gsa.splitFixedWidth <- function(line, columnStarts) {
splitStartStop <- function(x) {
x = substring(x, starts, stops);
x = gsub("^[[:space:]]+|[[:space:]]+$", "", x);
x;
}
starts = c(1, columnStarts);
stops = c(columnStarts - 1, nchar(line));
sapply(line, splitStartStop)[,1];
}
# Load all GATKReport tables from a file # Load all GATKReport tables from a file
gsa.read.gatkreport <- function(filename) { gsa.read.gatkreport <- function(filename) {
con = file(filename, "r", blocking = TRUE); con = file(filename, "r", blocking = TRUE);
@ -31,9 +45,10 @@ gsa.read.gatkreport <- function(filename) {
tableName = NA; tableName = NA;
tableHeader = c(); tableHeader = c();
tableRows = c(); tableRows = c();
version = NA;
for (line in lines) { for (line in lines) {
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) { if (length(grep("^##:GATKReport.v", line, ignore.case=TRUE)) > 0) {
headerFields = unlist(strsplit(line, "[[:space:]]+")); headerFields = unlist(strsplit(line, "[[:space:]]+"));
if (!is.na(tableName)) { if (!is.na(tableName)) {
@ -43,13 +58,37 @@ gsa.read.gatkreport <- function(filename) {
tableName = headerFields[2]; tableName = headerFields[2];
tableHeader = c(); tableHeader = c();
tableRows = c(); tableRows = c();
# For differences in versions see
# $STING_HOME/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
if (length(grep("^##:GATKReport.v0.1[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.1";
} else if (length(grep("^##:GATKReport.v0.2[[:space:]]+", line, ignore.case=TRUE)) > 0) {
version = "v0.2";
columnStarts = c();
}
} else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) { } else if (length(grep("^[[:space:]]*$", line)) > 0 | length(grep("^[[:space:]]*#", line)) > 0) {
# do nothing # do nothing
} else if (!is.na(tableName)) { } else if (!is.na(tableName)) {
row = unlist(strsplit(line, "[[:space:]]+"));
if (version == "v0.1") {
row = unlist(strsplit(line, "[[:space:]]+"));
} else if (version == "v0.2") {
if (length(tableHeader) == 0) {
headerChars = unlist(strsplit(line, ""));
# Find the first position of non space characters, excluding the first character
columnStarts = intersect(grep("[[:space:]]", headerChars, invert=TRUE), grep("[[:space:]]", headerChars) + 1);
}
row = .gsa.splitFixedWidth(line, columnStarts);
}
if (length(tableHeader) == 0) { if (length(tableHeader) == 0) {
tableHeader = row; tableHeader = row;
} else { } else {
tableRows = rbind(tableRows, row); tableRows = rbind(tableRows, row);
} }

View File

@ -25,7 +25,6 @@
package net.sf.picard.reference; package net.sf.picard.reference;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSourceProgressListener;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*; import static net.sf.picard.reference.FastaSequenceIndexBuilder.Status.*;
@ -39,8 +38,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
* Produces fai file with same output as samtools faidx * Produces fai file with same output as samtools faidx
*/ */
public class FastaSequenceIndexBuilder { public class FastaSequenceIndexBuilder {
public File fastaFile; final public File fastaFile;
ReferenceDataSourceProgressListener progress; // interface that provides a method for updating user on progress of reading file final boolean printProgress;
// keep track of location in file // keep track of location in file
long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file; long bytesRead, endOfLastLine, lastTimestamp, fileLength; // initialized to -1 to keep 0-indexed position in file;
@ -55,10 +54,10 @@ public class FastaSequenceIndexBuilder {
public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT } public enum Status { NONE, CONTIG, FIRST_SEQ_LINE, SEQ_LINE, COMMENT }
Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum? Status status = Status.NONE; // keeps state of what is currently being read. better to use int instead of enum?
public FastaSequenceIndexBuilder(File fastaFile, ReferenceDataSourceProgressListener progress) { public FastaSequenceIndexBuilder(File fastaFile, boolean printProgress) {
this.progress = progress;
this.fastaFile = fastaFile; this.fastaFile = fastaFile;
fileLength = fastaFile.length(); fileLength = fastaFile.length();
this.printProgress = printProgress;
} }
/** /**
@ -252,8 +251,8 @@ public class FastaSequenceIndexBuilder {
if (System.currentTimeMillis() - lastTimestamp > 10000) { if (System.currentTimeMillis() - lastTimestamp > 10000) {
int percentProgress = (int) (100*bytesRead/fileLength); int percentProgress = (int) (100*bytesRead/fileLength);
if (progress != null) if (printProgress)
progress.percentProgress(percentProgress); System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percentProgress));
lastTimestamp = System.currentTimeMillis(); lastTimestamp = System.currentTimeMillis();
} }
} }

View File

@ -31,30 +31,85 @@ import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate; import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*; import java.io.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
* Created by IntelliJ IDEA. * Call R scripts to plot residual error versus the various covariates.
* User: rpoplin *
* Date: Dec 1, 2009 * <p>
* After counting covariates in either the initial BAM File or again in the recalibrated BAM File, an analysis tool is available which
* reads the .csv file and outputs several PDF (and .dat) files for each read group in the given BAM. These PDF files graphically
* show the various metrics and characteristics of the reported quality scores (often in relation to the empirical qualities).
* In order to show that any biases in the reported quality scores have been generally fixed through recalibration one should run
* CountCovariates again on a bam file produced by TableRecalibration. In this way users can compare the analysis plots generated
* by pre-recalibration and post-recalibration .csv files. Our usual chain of commands that we use to generate plots of residual
* error is: CountCovariates, TableRecalibrate, samtools index on the recalibrated bam file, CountCovariates again on the recalibrated
* bam file, and then AnalyzeCovariates on both the before and after recal_data.csv files to see the improvement in recalibration.
*
* <p>
* The color coding along with the RMSE is included in the plots to give some indication of the number of observations that went into
* each of the quality score estimates. It is defined as follows for N, the number of observations:
*
* <ul>
* <li>light blue means N < 1,000</li>
* <li>cornflower blue means 1,000 <= N < 10,000</li>
* <li>dark blue means N >= 10,000</li>
* <li>The pink dots indicate points whose quality scores are special codes used by the aligner and which are mathematically
* meaningless and so aren't included in any of the numerical calculations.</li>
* </ul>
*
* <p>
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
*
* <p>
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
*
* <h2>Input</h2>
* <p>
* The recalibration table file in CSV format that was generated by the CountCovariates walker.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx4g -jar AnalyzeCovariates.jar \
* -recalFile /path/to/recal.table.csv \
* -outputDir /path/to/output_dir/ \
* -resources resources/ \
* -ignoreQ 5
* </pre>
* *
* Create collapsed versions of the recal csv file and call R scripts to plot residual error versus the various covariates.
*/ */
@DocumentedGATKFeature(
groupName = "AnalyzeCovariates",
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
public class AnalyzeCovariates extends CommandLineProgram { public class AnalyzeCovariates extends CommandLineProgram {
///////////////////////////// /////////////////////////////
// Command Line Arguments // Command Line Arguments
///////////////////////////// /////////////////////////////
/**
* After the header, data records occur one per line until the end of the file. The first several items on a line are the
* values of the individual covariates and will change depending on which covariates were specified at runtime. The last
* three items are the data- that is, number of observations for this combination of covariates, number of reference mismatches,
* and the raw empirical quality score calculated by phred-scaling the mismatch rate.
*/
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv"; private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
@ -65,13 +120,22 @@ public class AnalyzeCovariates extends CommandLineProgram {
private String PATH_TO_RESOURCES = "public/R/"; private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5; private int IGNORE_QSCORES_LESS_THAN = 5;
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups private int NUM_READ_GROUPS_TO_PROCESS = -1; // -1 means process all read groups
/**
* Combinations of covariates in which there are zero mismatches technically have infinite quality. We get around this situation
* by capping at the specified value. We've found that Q40 is too low when using a more completely database of known variation like dbSNP build 132 or later.
*/
@Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50") @Argument(fullName="max_quality_score", shortName="maxQ", required = false, doc="The integer value at which to cap the quality scores, default is 50")
private int MAX_QUALITY_SCORE = 50; private int MAX_QUALITY_SCORE = 50;
/**
* This argument is useful for comparing before/after plots and you want the axes to match each other.
*/
@Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots") @Argument(fullName="max_histogram_value", shortName="maxHist", required = false, doc="If supplied, this value will be the max value of the histogram plots")
private int MAX_HISTOGRAM_VALUE = 0; private int MAX_HISTOGRAM_VALUE = 0;
@Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, this value will be the max value of the histogram plots") @Argument(fullName="do_indel_quality", shortName="indels", required = false, doc="If supplied, do indel quality plotting")
private boolean DO_INDEL_QUALITY = false; private boolean DO_INDEL_QUALITY = false;
@ -261,13 +325,14 @@ public class AnalyzeCovariates extends CommandLineProgram {
} }
private void callRScripts() { private void callRScripts() {
RScriptExecutor.RScriptArgumentCollection argumentCollection =
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
int numReadGroups = 0; int numReadGroups = 0;
// for each read group // for each read group
for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) { for( Object readGroupKey : dataManager.getCollapsedTable(0).data.keySet() ) {
Process p;
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
String readGroup = readGroupKey.toString(); String readGroup = readGroupKey.toString();
@ -276,35 +341,19 @@ public class AnalyzeCovariates extends CommandLineProgram {
// for each covariate // for each covariate
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
Covariate cov = requestedCovariates.get(iii); Covariate cov = requestedCovariates.get(iii);
try { final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
if (DO_INDEL_QUALITY) {
if (DO_INDEL_QUALITY) { executor.callRScripts("plot_indelQuality.R", outputFilename,
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_indelQuality.R" + " " + cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + } else {
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
p.waitFor();
} else {
if( iii == 1 ) { if( iii == 1 ) {
// Analyze reported quality // Analyze reported quality
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_QualityScoreCovariate.R" + " " + executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " + IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
IGNORE_QSCORES_LESS_THAN + " " + MAX_QUALITY_SCORE + " " + MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored } else { // Analyze all other covariates
p.waitFor(); executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
} else { // Analyze all other covariates cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
p = Runtime.getRuntime().exec(PATH_TO_RSCRIPT + " " + PATH_TO_RESOURCES + "plot_residualError_OtherCovariate.R" + " " +
OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat" + " " +
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
p.waitFor();
}
} }
} catch (InterruptedException e) {
e.printStackTrace();
System.exit(-1);
} catch (IOException e) {
System.out.println("Fatal Exception: Perhaps RScript jobs are being spawned too quickly? One work around is to process fewer read groups using the -numRG option.");
e.printStackTrace();
System.exit(-1);
} }
} }
} else { // at the maximum number of read groups so break out } else { // at the maximum number of read groups so break out

View File

@ -0,0 +1,4 @@
/**
* Package to plot residual accuracy versus error covariates for the base quality score recalibrator.
*/
package org.broadinstitute.sting.analyzecovariates;

View File

@ -0,0 +1,40 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import java.lang.annotation.*;
/**
* Indicates that a walker argument should is considered an advanced option.
*
* @author Mark DePristo
* @version 0.1
*/
@Documented
@Inherited
@Retention(RetentionPolicy.RUNTIME)
@Target({ElementType.TYPE,ElementType.FIELD})
public @interface Advanced {
}

View File

@ -174,7 +174,8 @@ public class ArgumentDefinitions implements Iterable<ArgumentDefinition> {
static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() { static DefinitionMatcher VerifiableDefinitionMatcher = new DefinitionMatcher() {
public boolean matches( ArgumentDefinition definition, Object key ) { public boolean matches( ArgumentDefinition definition, Object key ) {
return definition.validation != null; // We can perform some sort of validation for anything that isn't a flag.
return !definition.isFlag;
} }
}; };
} }

View File

@ -44,7 +44,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
public final String label; public final String label;
/** /**
* Maps indicies of command line arguments to values paired with that argument. * Maps indices of command line arguments to values paired with that argument.
*/ */
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>(); public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();

View File

@ -151,6 +151,14 @@ public class ArgumentSource {
return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class); return field.isAnnotationPresent(Hidden.class) || field.isAnnotationPresent(Deprecated.class);
} }
/**
* Is the given argument considered an advanced option when displaying on the command-line argument system.
* @return True if so. False otherwise.
*/
public boolean isAdvanced() {
return field.isAnnotationPresent(Advanced.class);
}
/** /**
* Is this command-line argument dependent on some primitive argument types? * Is this command-line argument dependent on some primitive argument types?
* @return True if this command-line argument depends on other arguments; false otherwise. * @return True if this command-line argument depends on other arguments; false otherwise.
@ -175,13 +183,17 @@ public class ArgumentSource {
return typeDescriptor.createsTypeDefault(this); return typeDescriptor.createsTypeDefault(this);
} }
public String typeDefaultDocString() {
return typeDescriptor.typeDefaultDocString(this);
}
/** /**
* Generates a default for the given type. * Generates a default for the given type.
* @param parsingEngine the parsing engine used to validate this argument type descriptor. * @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @return A default value for the given type. * @return A default value for the given type.
*/ */
public Object createTypeDefault(ParsingEngine parsingEngine) { public Object createTypeDefault(ParsingEngine parsingEngine) {
return typeDescriptor.createTypeDefault(parsingEngine,this,field.getType()); return typeDescriptor.createTypeDefault(parsingEngine,this,field.getGenericType());
} }
/** /**

View File

@ -26,6 +26,8 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Multiplex; import org.broadinstitute.sting.gatk.walkers.Multiplex;
import org.broadinstitute.sting.gatk.walkers.Multiplexer; import org.broadinstitute.sting.gatk.walkers.Multiplexer;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -33,6 +35,7 @@ import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.lang.annotation.Annotation; import java.lang.annotation.Annotation;
import java.lang.reflect.*; import java.lang.reflect.*;
import java.util.*; import java.util.*;
@ -80,14 +83,26 @@ public abstract class ArgumentTypeDescriptor {
*/ */
public boolean createsTypeDefault(ArgumentSource source) { return false; } public boolean createsTypeDefault(ArgumentSource source) { return false; }
/**
* Returns a documentation-friendly value for the default of a type descriptor.
* Must be overridden if createsTypeDefault return true. cannot be called otherwise
* @param source Source of the command-line argument.
* @return Friendly string of the default value, for documentation. If doesn't create a default, throws
* and UnsupportedOperationException
*/
public String typeDefaultDocString(ArgumentSource source) {
throw new UnsupportedOperationException();
}
/** /**
* Generates a default for the given type. * Generates a default for the given type.
*
* @param parsingEngine the parsing engine used to validate this argument type descriptor. * @param parsingEngine the parsing engine used to validate this argument type descriptor.
* @param source Source of the command-line argument. * @param source Source of the command-line argument.
* @param type Type of value to create, in case the command-line argument system wants influence. * @param type Type of value to create, in case the command-line argument system wants influence.
* @return A default value for the given type. * @return A default value for the given type.
*/ */
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); } public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) { throw new UnsupportedOperationException("Unable to create default for type " + getClass()); }
/** /**
* Given the given argument source and attributes, synthesize argument definitions for command-line arguments. * Given the given argument source and attributes, synthesize argument definitions for command-line arguments.
@ -109,7 +124,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parsed object. * @return The parsed object.
*/ */
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, ArgumentMatches matches) {
return parse(parsingEngine, source, source.field.getType(), matches); return parse(parsingEngine, source, source.field.getGenericType(), matches);
} }
/** /**
@ -131,18 +146,18 @@ public abstract class ArgumentTypeDescriptor {
protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) { protected ArgumentDefinition createDefaultArgumentDefinition( ArgumentSource source ) {
Annotation argumentAnnotation = getArgumentAnnotation(source); Annotation argumentAnnotation = getArgumentAnnotation(source);
return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation), return new ArgumentDefinition( ArgumentIOType.getIOType(argumentAnnotation),
source.field.getType(), source.field.getType(),
ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()), ArgumentDefinition.getFullName(argumentAnnotation, source.field.getName()),
ArgumentDefinition.getShortName(argumentAnnotation), ArgumentDefinition.getShortName(argumentAnnotation),
ArgumentDefinition.getDoc(argumentAnnotation), ArgumentDefinition.getDoc(argumentAnnotation),
source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(), source.isRequired() && !createsTypeDefault(source) && !source.isFlag() && !source.isDeprecated(),
source.isFlag(), source.isFlag(),
source.isMultiValued(), source.isMultiValued(),
source.isHidden(), source.isHidden(),
getCollectionComponentType(source.field), makeRawTypeIfNecessary(getCollectionComponentType(source.field)),
ArgumentDefinition.getExclusiveOf(argumentAnnotation), ArgumentDefinition.getExclusiveOf(argumentAnnotation),
ArgumentDefinition.getValidationRegex(argumentAnnotation), ArgumentDefinition.getValidationRegex(argumentAnnotation),
getValidOptions(source) ); getValidOptions(source) );
} }
/** /**
@ -151,7 +166,7 @@ public abstract class ArgumentTypeDescriptor {
* @return The parameterized component type, or String.class if the parameterized type could not be found. * @return The parameterized component type, or String.class if the parameterized type could not be found.
* @throws IllegalArgumentException If more than one parameterized type is found on the field. * @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/ */
protected Class getCollectionComponentType( Field field ) { protected Type getCollectionComponentType( Field field ) {
return null; return null;
} }
@ -162,7 +177,7 @@ public abstract class ArgumentTypeDescriptor {
* @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection. * @param matches The argument matches for the argument source, or the individual argument match for a scalar if this is being called to help parse a collection.
* @return The individual parsed object matching the argument match with Class type. * @return The individual parsed object matching the argument match with Class type.
*/ */
public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ); public abstract Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches );
/** /**
* If the argument source only accepts a small set of options, populate the returned list with * If the argument source only accepts a small set of options, populate the returned list with
@ -273,6 +288,123 @@ public abstract class ArgumentTypeDescriptor {
public static boolean isArgumentHidden(Field field) { public static boolean isArgumentHidden(Field field) {
return field.isAnnotationPresent(Hidden.class); return field.isAnnotationPresent(Hidden.class);
} }
public Class makeRawTypeIfNecessary(Type t) {
if ( t == null )
return null;
else if ( t instanceof ParameterizedType )
return (Class)((ParameterizedType) t).getRawType();
else if ( t instanceof Class ) {
return (Class)t;
} else {
throw new IllegalArgumentException("Unable to determine Class-derived component type of field: " + t);
}
}
}
/**
* Parser for RodBinding objects
*/
class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* We only want RodBinding class objects
* @param type The type to check.
* @return true if the provided class is a RodBinding.class
*/
@Override
public boolean supports( Class type ) {
return isRodBinding(type);
}
public static boolean isRodBinding( Class type ) {
return RodBinding.class.isAssignableFrom(type);
}
@Override
public boolean createsTypeDefault(ArgumentSource source) { return ! source.isRequired(); }
@Override
public Object createTypeDefault(ParsingEngine parsingEngine, ArgumentSource source, Type type) {
Class parameterType = JVMUtils.getParameterizedTypeClass(type);
return RodBinding.makeUnbound((Class<? extends Feature>)parameterType);
}
@Override
public String typeDefaultDocString(ArgumentSource source) {
return "none";
}
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
try {
String name = defaultDefinition.fullName;
String tribbleType = null;
Tags tags = getArgumentTags(matches);
// must have one or two tag values here
if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only suport -X:type and -X:name,type argument styles",
value, source.field.getName()));
} if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style
name = tags.getPositionalTags().get(0);
tribbleType = tags.getPositionalTags().get(1);
} else {
// case with 0 or 1 positional tags
FeatureManager manager = new FeatureManager();
// -X:type style is a type when we cannot determine the type dynamically
String tag1 = tags.getPositionalTags().size() == 1 ? tags.getPositionalTags().get(0) : null;
if ( tag1 != null ) {
if ( manager.getByName(tag1) != null ) // this a type
tribbleType = tag1;
else
name = tag1;
}
if ( tribbleType == null ) {
// try to determine the file type dynamically
File file = new File(value);
if ( file.canRead() && file.isFile() ) {
FeatureManager.FeatureDescriptor featureDescriptor = manager.getByFiletype(file);
if ( featureDescriptor != null ) {
tribbleType = featureDescriptor.getName();
logger.info("Dynamically determined type of " + file + " to be " + tribbleType);
}
}
if ( tribbleType == null )
if ( ! file.canRead() | ! file.isFile() ) {
throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
} else {
throw new UserException.CommandLineException(
String.format("No tribble type was provided on the command line and the type of the file could not be determined dynamically. " +
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType)));
}
}
}
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(result,tags);
parsingEngine.addRodBinding(result);
return result;
} catch (InvocationTargetException e) {
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s.",
value, source.field.getName()));
} catch (Exception e) {
throw new UserException.CommandLineException(
String.format("Failed to parse value %s for argument %s. Message: %s",
value, source.field.getName(), e.getMessage()));
}
}
} }
/** /**
@ -282,9 +414,10 @@ public abstract class ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public boolean supports( Class type ) { public boolean supports( Class type ) {
if( type.isPrimitive() ) return true; if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
if( type.isEnum() ) return true; if ( type.isPrimitive() ) return true;
if( primitiveToWrapperMap.containsValue(type) ) return true; if ( type.isEnum() ) return true;
if ( primitiveToWrapperMap.containsValue(type) ) return true;
try { try {
type.getConstructor(String.class); type.getConstructor(String.class);
@ -298,7 +431,8 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type fulltype, ArgumentMatches matches) {
Class type = makeRawTypeIfNecessary(fulltype);
if (source.isFlag()) if (source.isFlag())
return true; return true;
@ -339,7 +473,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
throw e; throw e;
} catch (InvocationTargetException e) { } catch (InvocationTargetException e) {
throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)", throw new UserException.CommandLineException(String.format("Failed to parse value %s for argument %s. This is most commonly caused by providing an incorrect data type (e.g. a double when an int is required)",
value, source.field.getName())); value, source.field.getName()));
} catch (Exception e) { } catch (Exception e) {
throw new DynamicClassResolutionException(String.class, e); throw new DynamicClassResolutionException(String.class, e);
} }
@ -351,7 +485,7 @@ class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return result; return result;
} }
/** /**
* A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer * A mapping of the primitive types to their associated wrapper classes. Is there really no way to infer
@ -382,10 +516,10 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Class type, ArgumentMatches matches) { public Object parse(ParsingEngine parsingEngine,ArgumentSource source, Type fulltype, ArgumentMatches matches) {
Class componentType; Class type = makeRawTypeIfNecessary(fulltype);
Type componentType;
Object result; Object result;
Tags tags;
if( Collection.class.isAssignableFrom(type) ) { if( Collection.class.isAssignableFrom(type) ) {
@ -399,7 +533,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
componentType = getCollectionComponentType( source.field ); componentType = getCollectionComponentType( source.field );
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
Collection collection; Collection collection;
try { try {
@ -428,7 +562,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
else if( type.isArray() ) { else if( type.isArray() ) {
componentType = type.getComponentType(); componentType = type.getComponentType();
ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(componentType); ArgumentTypeDescriptor componentArgumentParser = parsingEngine.selectBestTypeDescriptor(makeRawTypeIfNecessary(componentType));
// Assemble a collection of individual values used in this computation. // Assemble a collection of individual values used in this computation.
Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>(); Collection<ArgumentMatch> values = new ArrayList<ArgumentMatch>();
@ -436,7 +570,7 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
for( ArgumentMatch value: match ) for( ArgumentMatch value: match )
values.add(value); values.add(value);
result = Array.newInstance(componentType,values.size()); result = Array.newInstance(makeRawTypeIfNecessary(componentType),values.size());
int i = 0; int i = 0;
for( ArgumentMatch value: values ) { for( ArgumentMatch value: values ) {
@ -459,16 +593,16 @@ class CompoundArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field. * @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/ */
@Override @Override
protected Class getCollectionComponentType( Field field ) { protected Type getCollectionComponentType( Field field ) {
// If this is a parameterized collection, find the contained type. If blow up if more than one type exists. // If this is a parameterized collection, find the contained type. If blow up if more than one type exists.
if( field.getGenericType() instanceof ParameterizedType) { if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();
if( parameterizedType.getActualTypeArguments().length > 1 ) if( parameterizedType.getActualTypeArguments().length > 1 )
throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString()); throw new IllegalArgumentException("Unable to determine collection type of field: " + field.toString());
return (Class)parameterizedType.getActualTypeArguments()[0]; return parameterizedType.getActualTypeArguments()[0];
} }
else else
return String.class; return String.class;
} }
} }
@ -510,12 +644,12 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(multiplexer == null || multiplexedIds == null) if(multiplexer == null || multiplexedIds == null)
throw new ReviewedStingException("No multiplexed ids available"); throw new ReviewedStingException("No multiplexed ids available");
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>(); Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
Class componentType = getCollectionComponentType(source.field); Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType); ArgumentTypeDescriptor componentTypeDescriptor = parsingEngine.selectBestTypeDescriptor(componentType);
for(Object id: multiplexedIds) { for(Object id: multiplexedIds) {
@ -527,15 +661,19 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
return multiplexedMapping; return multiplexedMapping;
} }
@Override
public String typeDefaultDocString(ArgumentSource source) {
return "None";
}
@Override @Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
if(multiplexedIds == null) if(multiplexedIds == null)
throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first."); throw new ReviewedStingException("Cannot directly parse a MultiplexArgumentTypeDescriptor; must create a derivative type descriptor first.");
Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>(); Map<Object,Object> multiplexedMapping = new HashMap<Object,Object>();
Class componentType = getCollectionComponentType(source.field); Class componentType = makeRawTypeIfNecessary(getCollectionComponentType(source.field));
for(Object id: multiplexedIds) { for(Object id: multiplexedIds) {
@ -606,7 +744,7 @@ class MultiplexArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @throws IllegalArgumentException If more than one parameterized type is found on the field. * @throws IllegalArgumentException If more than one parameterized type is found on the field.
*/ */
@Override @Override
protected Class getCollectionComponentType( Field field ) { protected Type getCollectionComponentType( Field field ) {
// Multiplex arguments must resolve to maps from which the clp should extract the second type. // Multiplex arguments must resolve to maps from which the clp should extract the second type.
if( field.getGenericType() instanceof ParameterizedType) { if( field.getGenericType() instanceof ParameterizedType) {
ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType(); ParameterizedType parameterizedType = (ParameterizedType)field.getGenericType();

View File

@ -43,7 +43,7 @@ import java.util.Locale;
public abstract class CommandLineProgram { public abstract class CommandLineProgram {
/** The command-line program and the arguments it returned. */ /** The command-line program and the arguments it returned. */
protected ParsingEngine parser = null; public ParsingEngine parser = null;
/** the default log level */ /** the default log level */
@Argument(fullName = "logging_level", @Argument(fullName = "logging_level",
@ -144,6 +144,11 @@ public abstract class CommandLineProgram {
public static int result = -1; public static int result = -1;
@SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) throws Exception {
start(clp, args, false);
}
/** /**
* This function is called to start processing the command line, and kick * This function is called to start processing the command line, and kick
* off the execute message of the program. * off the execute message of the program.
@ -153,7 +158,7 @@ public abstract class CommandLineProgram {
* @throws Exception when an exception occurs * @throws Exception when an exception occurs
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
public static void start(CommandLineProgram clp, String[] args) throws Exception { public static void start(CommandLineProgram clp, String[] args, boolean dryRun) throws Exception {
try { try {
// setup our log layout // setup our log layout
@ -180,8 +185,9 @@ public abstract class CommandLineProgram {
// - InvalidArgument in case these arguments are specified by plugins. // - InvalidArgument in case these arguments are specified by plugins.
// - MissingRequiredArgument in case the user requested help. Handle that later, once we've // - MissingRequiredArgument in case the user requested help. Handle that later, once we've
// determined the full complement of arguments. // determined the full complement of arguments.
parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument, if ( ! dryRun )
ParsingEngine.ValidationType.InvalidArgument)); parser.validate(EnumSet.of(ParsingEngine.ValidationType.MissingRequiredArgument,
ParsingEngine.ValidationType.InvalidArgument));
parser.loadArgumentsIntoObject(clp); parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line. // Initialize the logger using the loaded command line.
@ -195,36 +201,40 @@ public abstract class CommandLineProgram {
if (isHelpPresent(parser)) if (isHelpPresent(parser))
printHelpAndExit(clp, parser); printHelpAndExit(clp, parser);
parser.validate(); if ( ! dryRun ) parser.validate();
} else { } else {
parser.parse(args); parser.parse(args);
if (isHelpPresent(parser)) if ( ! dryRun ) {
printHelpAndExit(clp, parser); if (isHelpPresent(parser))
printHelpAndExit(clp, parser);
parser.validate(); parser.validate();
}
parser.loadArgumentsIntoObject(clp); parser.loadArgumentsIntoObject(clp);
// Initialize the logger using the loaded command line. // Initialize the logger using the loaded command line.
clp.setupLoggerLevel(layout); clp.setupLoggerLevel(layout);
} }
// if they specify a log location, output our data there if ( ! dryRun ) {
if (clp.toFile != null) { // if they specify a log location, output our data there
FileAppender appender; if (clp.toFile != null) {
try { FileAppender appender;
appender = new FileAppender(layout, clp.toFile, false); try {
logger.addAppender(appender); appender = new FileAppender(layout, clp.toFile, false);
} catch (IOException e) { logger.addAppender(appender);
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists"); } catch (IOException e) {
throw new RuntimeException("Unable to re-route log output to " + clp.toFile + " make sure the destination exists");
}
} }
// regardless of what happens next, generate the header information
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
// call the execute
CommandLineProgram.result = clp.execute();
} }
// regardless of what happens next, generate the header information
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
// call the execute
CommandLineProgram.result = clp.execute();
} }
catch (ArgumentException e) { catch (ArgumentException e) {
clp.parser.printHelp(clp.getApplicationDetails()); clp.parser.printHelp(clp.getApplicationDetails());

View File

@ -55,7 +55,7 @@ public @interface Output {
* --help argument is specified. * --help argument is specified.
* @return Doc string associated with this command-line argument. * @return Doc string associated with this command-line argument.
*/ */
String doc() default "An output file presented to the walker. Will overwrite contents if file exists."; String doc() default "An output file created by the walker. Will overwrite contents if file exists";
/** /**
* Is this argument required. If true, the command-line argument system will * Is this argument required. If true, the command-line argument system will

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -41,11 +42,16 @@ import java.util.*;
* A parser for Sting command-line arguments. * A parser for Sting command-line arguments.
*/ */
public class ParsingEngine { public class ParsingEngine {
/**
* The loaded argument sources along with their back definitions.
*/
private Map<ArgumentDefinition,ArgumentSource> argumentSourcesByDefinition = new HashMap<ArgumentDefinition,ArgumentSource>();
/** /**
* A list of defined arguments against which command lines are matched. * A list of defined arguments against which command lines are matched.
* Package protected for testing access. * Package protected for testing access.
*/ */
ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions(); public ArgumentDefinitions argumentDefinitions = new ArgumentDefinitions();
/** /**
* A list of matches from defined arguments to command-line text. * A list of matches from defined arguments to command-line text.
@ -59,11 +65,17 @@ public class ParsingEngine {
*/ */
private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>(); private List<ParsingMethod> parsingMethods = new ArrayList<ParsingMethod>();
/**
* All of the RodBinding objects we've seen while parsing
*/
private List<RodBinding> rodBindings = new ArrayList<RodBinding>();
/** /**
* Class reference to the different types of descriptors that the create method can create. * Class reference to the different types of descriptors that the create method can create.
* The type of set used must be ordered (but not necessarily sorted). * The type of set used must be ordered (but not necessarily sorted).
*/ */
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(), private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) ); new MultiplexArgumentTypeDescriptor()) );
@ -80,6 +92,7 @@ public class ParsingEngine {
protected static Logger logger = Logger.getLogger(ParsingEngine.class); protected static Logger logger = Logger.getLogger(ParsingEngine.class);
public ParsingEngine( CommandLineProgram clp ) { public ParsingEngine( CommandLineProgram clp ) {
RodBinding.resetNameCounter();
parsingMethods.add( ParsingMethod.FullNameParsingMethod ); parsingMethods.add( ParsingMethod.FullNameParsingMethod );
parsingMethods.add( ParsingMethod.ShortNameParsingMethod ); parsingMethods.add( ParsingMethod.ShortNameParsingMethod );
@ -107,8 +120,13 @@ public class ParsingEngine {
*/ */
public void addArgumentSource( String sourceName, Class sourceClass ) { public void addArgumentSource( String sourceName, Class sourceClass ) {
List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>(); List<ArgumentDefinition> argumentsFromSource = new ArrayList<ArgumentDefinition>();
for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) for( ArgumentSource argumentSource: extractArgumentSources(sourceClass) ) {
argumentsFromSource.addAll( argumentSource.createArgumentDefinitions() ); List<ArgumentDefinition> argumentDefinitions = argumentSource.createArgumentDefinitions();
for(ArgumentDefinition argumentDefinition: argumentDefinitions) {
argumentSourcesByDefinition.put(argumentDefinition,argumentSource);
argumentsFromSource.add( argumentDefinition );
}
}
argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) ); argumentDefinitions.add( new ArgumentDefinitionGroup(sourceName, argumentsFromSource) );
} }
@ -199,16 +217,25 @@ public class ParsingEngine {
throw new InvalidArgumentException( invalidArguments ); throw new InvalidArgumentException( invalidArguments );
} }
// Find invalid argument values (arguments that fail the regexp test. // Find invalid argument values -- invalid arguments are either completely missing or fail the specified 'validation' regular expression.
if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) { if( !skipValidationOf.contains(ValidationType.InvalidArgumentValue) ) {
Collection<ArgumentDefinition> verifiableArguments = Collection<ArgumentDefinition> verifiableArguments =
argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher ); argumentDefinitions.findArgumentDefinitions( null, ArgumentDefinitions.VerifiableDefinitionMatcher );
Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>(); Collection<Pair<ArgumentDefinition,String>> invalidValues = new ArrayList<Pair<ArgumentDefinition,String>>();
for( ArgumentDefinition verifiableArgument: verifiableArguments ) { for( ArgumentDefinition verifiableArgument: verifiableArguments ) {
ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument ); ArgumentMatches verifiableMatches = argumentMatches.findMatches( verifiableArgument );
// Check to see whether an argument value was specified. Argument values must be provided
// when the argument name is specified and the argument is not a flag type.
for(ArgumentMatch verifiableMatch: verifiableMatches) {
ArgumentSource argumentSource = argumentSourcesByDefinition.get(verifiableArgument);
if(verifiableMatch.values().size() == 0 && !verifiableArgument.isFlag && argumentSource.createsTypeDefault())
invalidValues.add(new Pair<ArgumentDefinition,String>(verifiableArgument,null));
}
// Ensure that the field contents meet the validation criteria specified by the regular expression.
for( ArgumentMatch verifiableMatch: verifiableMatches ) { for( ArgumentMatch verifiableMatch: verifiableMatches ) {
for( String value: verifiableMatch.values() ) { for( String value: verifiableMatch.values() ) {
if( !value.matches(verifiableArgument.validation) ) if( verifiableArgument.validation != null && !value.matches(verifiableArgument.validation) )
invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) ); invalidValues.add( new Pair<ArgumentDefinition,String>(verifiableArgument, value) );
} }
} }
@ -304,7 +331,17 @@ public class ParsingEngine {
if(!tags.containsKey(key)) if(!tags.containsKey(key))
return new Tags(); return new Tags();
return tags.get(key); return tags.get(key);
} }
/**
* Add a RodBinding type argument to this parser. Called during parsing to allow
* us to track all of the RodBindings discovered in the command line.
* @param rodBinding the rodbinding to add. Must not be added twice
*/
@Requires("rodBinding != null")
public void addRodBinding(final RodBinding rodBinding) {
rodBindings.add(rodBinding);
}
/** /**
* Notify the user that a deprecated command-line argument has been used. * Notify the user that a deprecated command-line argument has been used.
@ -327,7 +364,7 @@ public class ParsingEngine {
*/ */
private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) { private void loadValueIntoObject( ArgumentSource source, Object instance, ArgumentMatches argumentMatches ) {
// Nothing to load // Nothing to load
if( argumentMatches.size() == 0 && !(source.createsTypeDefault() && source.isRequired())) if( argumentMatches.size() == 0 && ! source.createsTypeDefault() )
return; return;
// Target instance into which to inject the value. // Target instance into which to inject the value.
@ -344,6 +381,10 @@ public class ParsingEngine {
} }
} }
public Collection<RodBinding> getRodBindings() {
return Collections.unmodifiableCollection(rodBindings);
}
/** /**
* Gets a collection of the container instances of the given type stored within the given target. * Gets a collection of the container instances of the given type stored within the given target.
* @param source Argument source. * @param source Argument source.
@ -390,7 +431,6 @@ public class ParsingEngine {
return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type); return ArgumentTypeDescriptor.selectBest(argumentTypeDescriptors,type);
} }
private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) { private List<ArgumentSource> extractArgumentSources(Class sourceClass, Field[] parentFields) {
// now simply call into the truly general routine extract argument bindings but with a null // now simply call into the truly general routine extract argument bindings but with a null
// object so bindings aren't computed // object so bindings aren't computed
@ -515,10 +555,14 @@ class InvalidArgumentValueException extends ArgumentException {
private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) { private static String formatArguments( Collection<Pair<ArgumentDefinition,String>> invalidArgumentValues ) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) { for( Pair<ArgumentDefinition,String> invalidValue: invalidArgumentValues ) {
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)", if(invalidValue.getSecond() == null)
invalidValue.first.fullName, sb.append( String.format("%nArgument '--%s' requires a value but none was provided",
invalidValue.second, invalidValue.first.fullName) );
invalidValue.first.validation) ); else
sb.append( String.format("%nArgument '--%s' has value of incorrect format: %s (should match %s)",
invalidValue.first.fullName,
invalidValue.second,
invalidValue.first.validation) );
} }
return sb.toString(); return sb.toString();
} }

View File

@ -0,0 +1,187 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import java.util.*;
/**
* A RodBinding representing a walker argument that gets bound to a ROD track.
*
* The RodBinding<T> is a formal GATK argument that bridges between a walker and
* the RefMetaDataTracker to obtain data about this rod track at runtime. The RodBinding
* is explicitly typed with type of the Tribble.Feature expected to be produced by this
* argument. The GATK Engine takes care of initializing the binding and connecting it
* to the RMD system.
*
* It is recommended that optional RodBindings be initialized to the value returned
* by the static method makeUnbound().
*
* Note that this class is immutable.
*/
public final class RodBinding<T extends Feature> {
protected final static String UNBOUND_VARIABLE_NAME = "";
protected final static String UNBOUND_SOURCE = "UNBOUND";
protected final static String UNBOUND_TRIBBLE_TYPE = "";
/**
* Create an unbound Rodbinding of type. This is the correct programming
* style for an optional RodBinding<T>
*
* At Input()
* RodBinding<T> x = RodBinding.makeUnbound(T.class)
*
* The unbound binding is guaranteed to never match any binding. It uniquely
* returns false to isBound().
*
* @param type the Class type produced by this unbound object
* @param <T> any class extending Tribble Feature
* @return the UNBOUND RodBinding producing objects of type T
*/
@Requires("type != null")
protected final static <T extends Feature> RodBinding<T> makeUnbound(Class<T> type) {
return new RodBinding<T>(type);
}
/** The name of this binding. Often the name of the field itself, but can be overridden on cmdline */
final private String name;
/** where the data for this ROD is coming from. A file or special value if coming from stdin */
final private String source;
/** the string name of the tribble type, such as vcf, bed, etc. */
final private String tribbleType;
/** The command line tags associated with this RodBinding */
final private Tags tags;
/** The Java class expected for this RodBinding. Must correspond to the type emited by Tribble */
final private Class<T> type;
/** True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments */
final private boolean bound;
/**
* The name counter. This is how we create unique names for collections of RodBindings
* on the command line. If you have provide the GATK with -X file1 and -X file2 to a
* RodBinding argument as List<RodBinding<T>> then each binding will receive automatically
* the name of X and X2.
*/
final private static Map<String, Integer> nameCounter = new HashMap<String, Integer>();
/** for UnitTests */
final public static void resetNameCounter() {
nameCounter.clear();
}
@Requires("rawName != null")
@Ensures("result != null")
final private static synchronized String countedVariableName(final String rawName) {
Integer count = nameCounter.get(rawName);
if ( count == null ) {
nameCounter.put(rawName, 1);
return rawName;
} else {
nameCounter.put(rawName, count + 1);
return rawName + (count + 1);
}
}
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
public RodBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
this.type = type;
this.name = countedVariableName(rawName);
this.source = source;
this.tribbleType = tribbleType;
this.tags = tags;
this.bound = true;
}
/**
* Make an unbound RodBinding<T>. Only available for creating the globally unique UNBOUND object
* @param type class this unbound RodBinding creates
*/
@Requires({"type != null"})
private RodBinding(Class<T> type) {
this.type = type;
this.name = UNBOUND_VARIABLE_NAME; // special value can never be found in RefMetaDataTracker
this.source = UNBOUND_SOURCE;
this.tribbleType = UNBOUND_TRIBBLE_TYPE;
this.tags = new Tags();
this.bound = false;
}
/**
* @return True for all RodBindings except the special UNBOUND binding, which is the default for optional arguments
*/
final public boolean isBound() {
return bound;
}
/**
* @return The name of this binding. Often the name of the field itself, but can be overridden on cmdline
*/
@Ensures({"result != null"})
final public String getName() {
return name;
}
/**
* @return the string name of the tribble type, such as vcf, bed, etc.
*/
@Ensures({"result != null"})
final public Class<T> getType() {
return type;
}
/**
* @return where the data for this ROD is coming from. A file or special value if coming from stdin
*/
@Ensures({"result != null"})
final public String getSource() {
return source;
}
/**
* @return The command line tags associated with this RodBinding. Will include the tags used to
* determine the name and type of this RodBinding
*/
@Ensures({"result != null"})
final public Tags getTags() {
return tags;
}
/**
* @return The Java class expected for this RodBinding. Must correspond to the type emited by Tribble
*/
@Ensures({"result != null"})
final public String getTribbleType() {
return tribbleType;
}
@Override
public String toString() {
return String.format("(RodBinding name=%s source=%s)", getName(), getSource());
}
}

View File

@ -25,21 +25,20 @@
package org.broadinstitute.sting.gatk; package org.broadinstitute.sting.gatk;
import org.broadinstitute.sting.commandline.ArgumentTypeDescriptor; import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.OutputStreamArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileReaderArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.SAMFileWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor; import org.broadinstitute.sting.gatk.io.stubs.VCFWriterArgumentTypeDescriptor;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.classloader.JVMUtils;
import org.broadinstitute.sting.utils.text.ListFileUtils; import org.broadinstitute.sting.utils.text.ListFileUtils;
import java.util.ArrayList; import java.util.*;
import java.util.Arrays;
import java.util.Collection;
/** /**
* @author aaron * @author aaron
@ -64,6 +63,8 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/ */
private final Collection<Object> argumentSources = new ArrayList<Object>(); private final Collection<Object> argumentSources = new ArrayList<Object>();
protected static Logger logger = Logger.getLogger(CommandLineExecutable.class);
/** /**
* this is the function that the inheriting class can expect to have called * this is the function that the inheriting class can expect to have called
* when the command line system has initialized. * when the command line system has initialized.
@ -81,7 +82,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
// File lists can require a bit of additional expansion. Set these explicitly by the engine. // File lists can require a bit of additional expansion. Set these explicitly by the engine.
engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser)); engine.setSAMFileIDs(ListFileUtils.unpackBAMFileList(getArgumentCollection().samFiles,parser));
engine.setReferenceMetaDataFiles(ListFileUtils.unpackRODBindings(getArgumentCollection().RODBindings,getArgumentCollection().DBSNPFile,parser));
engine.setWalker(walker); engine.setWalker(walker);
walker.setToolkit(engine); walker.setToolkit(engine);
@ -96,6 +96,24 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
loadArgumentsIntoObject(walker); loadArgumentsIntoObject(walker);
argumentSources.add(walker); argumentSources.add(walker);
Collection<RMDTriplet> rodBindings = ListFileUtils.unpackRODBindings(parser.getRodBindings(), parser);
// todo: remove me when the old style system is removed
if ( getArgumentCollection().RODBindings.size() > 0 ) {
logger.warn("################################################################################");
logger.warn("################################################################################");
logger.warn("Deprecated -B rod binding syntax detected. This syntax has been eliminated in GATK 1.2.");
logger.warn("Please use arguments defined by each specific walker instead.");
for ( String oldStyleRodBinding : getArgumentCollection().RODBindings ) {
logger.warn(" -B rod binding with value " + oldStyleRodBinding + " tags: " + parser.getTags(oldStyleRodBinding).getPositionalTags());
}
logger.warn("################################################################################");
logger.warn("################################################################################");
System.exit(1);
}
engine.setReferenceMetaDataFiles(rodBindings);
for (ReadFilter filter: filters) { for (ReadFilter filter: filters) {
loadArgumentsIntoObject(filter); loadArgumentsIntoObject(filter);
argumentSources.add(filter); argumentSources.add(filter);
@ -112,6 +130,7 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
return 0; return 0;
} }
/** /**
* Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled. * Generate the GATK run report for this walker using the current GATKEngine, if -et is enabled.
* This report will be written to either STDOUT or to the run repository, depending on the options * This report will be written to either STDOUT or to the run repository, depending on the options
@ -142,7 +161,6 @@ public abstract class CommandLineExecutable extends CommandLineProgram {
*/ */
protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() { protected Collection<ArgumentTypeDescriptor> getArgumentTypeDescriptors() {
return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources), return Arrays.asList( new VCFWriterArgumentTypeDescriptor(engine,System.out,argumentSources),
new SAMFileReaderArgumentTypeDescriptor(engine),
new SAMFileWriterArgumentTypeDescriptor(engine,System.out), new SAMFileWriterArgumentTypeDescriptor(engine,System.out),
new OutputStreamArgumentTypeDescriptor(engine,System.out) ); new OutputStreamArgumentTypeDescriptor(engine,System.out) );
} }

View File

@ -30,25 +30,27 @@ import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection; import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.walkers.Attribution; import org.broadinstitute.sting.gatk.walkers.Attribution;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.*;
import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*; import java.util.*;
/** /**
* @author aaron * The GATK engine itself. Manages map/reduce data access and runs walkers.
* @version 1.0 *
* @date May 8, 2009
* <p/>
* Class CommandLineGATK
* <p/>
* We run command line GATK programs using this class. It gets the command line args, parses them, and hands the * We run command line GATK programs using this class. It gets the command line args, parses them, and hands the
* gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here, * gatk all the parsed out information. Pretty much anything dealing with the underlying system should go here,
* the gatk engine should deal with any data related information. * the gatk engine should deal with any data related information.
*/ */
@DocumentedGATKFeature(
groupName = "GATK Engine",
summary = "Features and arguments for the GATK engine itself, available to all walkers.",
extraDocs = { UserException.class })
public class CommandLineGATK extends CommandLineExecutable { public class CommandLineGATK extends CommandLineExecutable {
@Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run") @Argument(fullName = "analysis_type", shortName = "T", doc = "Type of analysis to run")
private String analysisName = null; private String analysisName = null;
@ -173,12 +175,12 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder(); StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp); Formatter formatter = new Formatter(additionalHelp);
formatter.format("Description:%n"); formatter.format("Available Reference Ordered Data types:%n");
formatter.format(new FeatureManager().userFriendlyListOfAvailableFeatures());
formatter.format("%n");
WalkerManager walkerManager = engine.getWalkerManager(); formatter.format("For a full description of this walker, see its GATKdocs at:%n");
String walkerHelpText = walkerManager.getWalkerDescriptionText(walkerType); formatter.format("%s%n", GATKDocUtils.helpLinksToGATKDocs(walkerType));
printDescriptorLine(formatter,WALKER_INDENT,"",WALKER_INDENT,FIELD_SEPARATOR,walkerHelpText,TextFormattingUtils.DEFAULT_LINE_WIDTH);
return additionalHelp.toString(); return additionalHelp.toString();
} }
@ -192,8 +194,6 @@ public class CommandLineGATK extends CommandLineExecutable {
StringBuilder additionalHelp = new StringBuilder(); StringBuilder additionalHelp = new StringBuilder();
Formatter formatter = new Formatter(additionalHelp); Formatter formatter = new Formatter(additionalHelp);
formatter.format("Available analyses:%n");
// Get the list of walker names from the walker manager. // Get the list of walker names from the walker manager.
WalkerManager walkerManager = engine.getWalkerManager(); WalkerManager walkerManager = engine.getWalkerManager();

View File

@ -43,7 +43,7 @@ import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator; import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
@ -370,33 +370,6 @@ public class GenomeAnalysisEngine {
throw new ArgumentException("Walker does not allow a reference but one was provided."); throw new ArgumentException("Walker does not allow a reference but one was provided.");
} }
/**
* Verifies that all required reference-ordered data has been supplied, and any reference-ordered data that was not
* 'allowed' is still present.
*
* @param rods Reference-ordered data to load.
*/
protected void validateSuppliedReferenceOrderedData(List<ReferenceOrderedDataSource> rods) {
// Check to make sure that all required metadata is present.
List<RMD> allRequired = WalkerManager.getRequiredMetaData(walker);
for (RMD required : allRequired) {
boolean found = false;
for (ReferenceOrderedDataSource rod : rods) {
if (rod.matchesNameAndRecordType(required.name(), required.type()))
found = true;
}
if (!found)
throw new ArgumentException(String.format("Walker requires reference metadata to be supplied named '%s' of type '%s', but this metadata was not provided. " +
"Please supply the specified metadata file.", required.name(), required.type().getSimpleName()));
}
// Check to see that no forbidden rods are present.
for (ReferenceOrderedDataSource rod : rods) {
if (!WalkerManager.isAllowed(walker, rod))
throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s", walker.getClass(), rod.getName()));
}
}
protected void validateSuppliedIntervals() { protected void validateSuppliedIntervals() {
// Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped. // Only read walkers support '-L unmapped' intervals. Trap and validate any other instances of -L unmapped.
if(!(walker instanceof ReadWalker)) { if(!(walker instanceof ReadWalker)) {
@ -716,8 +689,6 @@ public class GenomeAnalysisEngine {
validateSuppliedReads(); validateSuppliedReads();
readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference()); readsDataSource = createReadsDataSource(argCollection,genomeLocParser,referenceDataSource.getReference());
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
for (ReadFilter filter : filters) for (ReadFilter filter : filters)
filter.initialize(this); filter.initialize(this);
@ -926,9 +897,6 @@ public class GenomeAnalysisEngine {
GenomeLocParser genomeLocParser, GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType); RMDTrackBuilder builder = new RMDTrackBuilder(sequenceDictionary,genomeLocParser,validationExclusionType);
// try and make the tracks given their requests
// create of live instances of the tracks
List<RMDTrack> tracks = new ArrayList<RMDTrack>();
List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>(); List<ReferenceOrderedDataSource> dataSources = new ArrayList<ReferenceOrderedDataSource>();
for (RMDTriplet fileDescriptor : referenceMetaDataFiles) for (RMDTriplet fileDescriptor : referenceMetaDataFiles)
@ -939,7 +907,6 @@ public class GenomeAnalysisEngine {
flashbackData())); flashbackData()));
// validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match. // validation: check to make sure everything the walker needs is present, and that all sequence dictionaries match.
validateSuppliedReferenceOrderedData(dataSources);
validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder); validateSourcesAgainstReference(readsDataSource, referenceDataSource.getReference(), dataSources, builder);
return dataSources; return dataSources;
@ -994,7 +961,7 @@ public class GenomeAnalysisEngine {
/** /**
* Get the list of intervals passed to the engine. * Get the list of intervals passed to the engine.
* @return List of intervals. * @return List of intervals, or null if no intervals are in use
*/ */
public GenomeLocSortedSet getIntervals() { public GenomeLocSortedSet getIntervals() {
return this.intervals; return this.intervals;

View File

@ -33,9 +33,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.DescriptionTaglet; import org.broadinstitute.sting.utils.help.ResourceBundleExtractorDoclet;
import org.broadinstitute.sting.utils.help.DisplayNameTaglet;
import org.broadinstitute.sting.utils.help.SummaryTaglet;
import org.broadinstitute.sting.utils.text.TextFormattingUtils; import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.util.*; import java.util.*;
@ -82,19 +80,10 @@ public class WalkerManager extends PluginManager<Walker> {
* @return A suitable display name for the package. * @return A suitable display name for the package.
*/ */
public String getPackageDisplayName(String packageName) { public String getPackageDisplayName(String packageName) {
// Try to find an override for the display name of this package. // ...try to compute the override from the text of the package name, while accounting for
String displayNameKey = String.format("%s.%s",packageName,DisplayNameTaglet.NAME); // unpackaged walkers.
String displayName; String displayName = packageName.substring(packageName.lastIndexOf('.')+1);
if(helpText.containsKey(displayNameKey)) { if (displayName.trim().equals("")) displayName = "<unpackaged>";
displayName = helpText.getString(displayNameKey);
}
else {
// If no override exists...
// ...try to compute the override from the text of the package name, while accounting for
// unpackaged walkers.
displayName = packageName.substring(packageName.lastIndexOf('.')+1);
if(displayName.trim().equals("")) displayName = "<unpackaged>";
}
return displayName; return displayName;
} }
@ -104,7 +93,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Package help text, or "" if none exists. * @return Package help text, or "" if none exists.
*/ */
public String getPackageSummaryText(String packageName) { public String getPackageSummaryText(String packageName) {
String key = String.format("%s.%s",packageName,SummaryTaglet.NAME); String key = String.format("%s.%s",packageName, ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(key)) if(!helpText.containsKey(key))
return ""; return "";
return helpText.getString(key); return helpText.getString(key);
@ -116,7 +105,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Walker summary description, or "" if none exists. * @return Walker summary description, or "" if none exists.
*/ */
public String getWalkerSummaryText(Class<? extends Walker> walkerType) { public String getWalkerSummaryText(Class<? extends Walker> walkerType) {
String walkerSummary = String.format("%s.%s",walkerType.getName(), SummaryTaglet.NAME); String walkerSummary = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.SUMMARY_TAGLET_NAME);
if(!helpText.containsKey(walkerSummary)) if(!helpText.containsKey(walkerSummary))
return ""; return "";
return helpText.getString(walkerSummary); return helpText.getString(walkerSummary);
@ -137,7 +126,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return Walker full description, or "" if none exists. * @return Walker full description, or "" if none exists.
*/ */
public String getWalkerDescriptionText(Class<? extends Walker> walkerType) { public String getWalkerDescriptionText(Class<? extends Walker> walkerType) {
String walkerDescription = String.format("%s.%s",walkerType.getName(), DescriptionTaglet.NAME); String walkerDescription = String.format("%s.%s",walkerType.getName(), ResourceBundleExtractorDoclet.DESCRIPTION_TAGLET_NAME);
if(!helpText.containsKey(walkerDescription)) if(!helpText.containsKey(walkerDescription))
return ""; return "";
return helpText.getString(walkerDescription); return helpText.getString(walkerDescription);
@ -188,19 +177,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return The list of allowed reference meta data. * @return The list of allowed reference meta data.
*/ */
public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) { public static List<RMD> getAllowsMetaData(Class<? extends Walker> walkerClass) {
Allows allowsDataSource = getWalkerAllowed(walkerClass); return Collections.<RMD>emptyList();
if (allowsDataSource == null)
return Collections.<RMD>emptyList();
return Arrays.asList(allowsDataSource.referenceMetaData());
}
/**
* Get a list of RODs allowed by the walker.
* @param walker Walker to query.
* @return The list of allowed reference meta data.
*/
public static List<RMD> getAllowsMetaData(Walker walker) {
return getAllowsMetaData(walker.getClass());
} }
/** /**
@ -237,24 +214,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return True if the walker forbids this data type. False otherwise. * @return True if the walker forbids this data type. False otherwise.
*/ */
public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) { public static boolean isAllowed(Class<? extends Walker> walkerClass, ReferenceOrderedDataSource rod) {
Allows allowsDataSource = getWalkerAllowed(walkerClass); return true;
// Allows is less restrictive than requires. If an allows
// clause is not specified, any kind of data is allowed.
if( allowsDataSource == null )
return true;
// The difference between unspecified RMD and the empty set of metadata can't be detected.
// Treat an empty 'allows' as 'allow everything'. Maybe we can have a special RMD flag to account for this
// case in the future.
if( allowsDataSource.referenceMetaData().length == 0 )
return true;
for( RMD allowed: allowsDataSource.referenceMetaData() ) {
if( rod.matchesNameAndRecordType(allowed.name(),allowed.type()) )
return true;
}
return false;
} }
/** /**
@ -294,8 +254,7 @@ public class WalkerManager extends PluginManager<Walker> {
* @return The list of required reference meta data. * @return The list of required reference meta data.
*/ */
public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) { public static List<RMD> getRequiredMetaData(Class<? extends Walker> walkerClass) {
Requires requiresDataSource = getWalkerRequirements(walkerClass); return Collections.emptyList();
return Arrays.asList(requiresDataSource.referenceMetaData());
} }
/** /**

View File

@ -23,8 +23,26 @@
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.datasources.reference; package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.*;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class DbsnpArgumentCollection {
/**
* A dbSNP VCF file.
*/
@Input(fullName="dbsnp", shortName = "D", doc="dbSNP file", required=false)
public RodBinding<VariantContext> dbsnp;
public interface ReferenceDataSourceProgressListener {
public void percentProgress(int percent);
} }

View File

@ -101,6 +101,8 @@ public class GATKArgumentCollection {
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null; public File referenceFile = null;
@Deprecated
@Hidden
@ElementList(required = false) @ElementList(required = false)
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false) @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>(); public ArrayList<String> RODBindings = new ArrayList<String>();
@ -117,11 +119,6 @@ public class GATKArgumentCollection {
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false; public boolean nonDeterministicRandomSeed = false;
@Element(required = false)
@Input(fullName = "DBSNP", shortName = "D", doc = "DBSNP file", required = false)
public String DBSNPFile = null;
/** /**
* The override mechanism in the GATK, by default, populates the command-line arguments, then * The override mechanism in the GATK, by default, populates the command-line arguments, then
* the defaults from the walker annotations. Unfortunately, walker annotations should be trumped * the defaults from the walker annotations. Unfortunately, walker annotations should be trumped
@ -345,14 +342,6 @@ public class GATKArgumentCollection {
return false; return false;
} }
} }
if (other.RODBindings.size() != RODBindings.size()) {
return false;
}
for (int x = 0; x < RODBindings.size(); x++) {
if (!RODBindings.get(x).equals(other.RODBindings.get(x))) {
return false;
}
}
if (!other.samFiles.equals(this.samFiles)) { if (!other.samFiles.equals(this.samFiles)) {
return false; return false;
} }
@ -380,9 +369,6 @@ public class GATKArgumentCollection {
if (!other.excludeIntervals.equals(this.excludeIntervals)) { if (!other.excludeIntervals.equals(this.excludeIntervals)) {
return false; return false;
} }
if (!other.DBSNPFile.equals(this.DBSNPFile)) {
return false;
}
if (!other.unsafe.equals(this.unsafe)) { if (!other.unsafe.equals(this.unsafe)) {
return false; return false;
} }

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.Root;
/**
* @author ebanks
* @version 1.0
*/
@Root
public class StandardVariantContextInputArgumentCollection {
/**
* Variants from this VCF file are used by this tool as input.
* The file must at least contain the standard VCF header lines, but
* can be empty (i.e., no variants are contained in the file).
*/
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
public RodBinding<VariantContext> variants;
}

View File

@ -1,8 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.ArrayList; import java.util.ArrayList;
@ -49,11 +51,14 @@ public class ManagingReferenceOrderedView implements ReferenceOrderedView {
* @param loc Locus at which to track. * @param loc Locus at which to track.
* @return A tracker containing information about this locus. * @return A tracker containing information about this locus.
*/ */
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
RefMetaDataTracker tracks = new RefMetaDataTracker(states.size()); List<RODRecordList> bindings = states.isEmpty() ? Collections.<RODRecordList>emptyList() : new ArrayList<RODRecordList>(states.size());
for ( ReferenceOrderedDataState state: states ) for ( ReferenceOrderedDataState state: states )
tracks.bind( state.dataSource.getName(), state.iterator.seekForward(loc) ); // todo -- warning, I removed the reference to the name from states
return tracks; bindings.add( state.iterator.seekForward(loc) );
return new RefMetaDataTracker(bindings, referenceContext);
} }
/** /**

View File

@ -1,8 +1,9 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
public interface ReferenceOrderedView extends View { public interface ReferenceOrderedView extends View {
RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ); RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext refContext );
} }

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -45,7 +46,8 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
*/ */
private RODMergingIterator rodQueue = null; private RODMergingIterator rodQueue = null;
RefMetaDataTracker tracker = null; Collection<RODRecordList> allTracksHere;
GenomeLoc lastLoc = null; GenomeLoc lastLoc = null;
RODRecordList interval = null; RODRecordList interval = null;
@ -94,12 +96,12 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
} }
rodQueue = new RODMergingIterator(iterators); rodQueue = new RODMergingIterator(iterators);
//throw new StingException("RodLocusView currently disabled");
} }
public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc ) { public RefMetaDataTracker getReferenceOrderedDataAtLocus( GenomeLoc loc, ReferenceContext referenceContext ) {
return tracker; // special case the interval again -- add it into the ROD
if ( interval != null ) { allTracksHere.add(interval); }
return new RefMetaDataTracker(allTracksHere, referenceContext);
} }
public boolean hasNext() { public boolean hasNext() {
@ -122,10 +124,7 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n"); if ( DEBUG ) System.out.printf("In RodLocusView.next(): creating tracker...%n");
// Update the tracker here for use allTracksHere = getSpanningTracks(datum);
Collection<RODRecordList> allTracksHere = getSpanningTracks(datum);
tracker = createTracker(allTracksHere);
GenomeLoc rodSite = datum.getLocation(); GenomeLoc rodSite = datum.getLocation();
GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart()); GenomeLoc site = genomeLocParser.createGenomeLoc( rodSite.getContig(), rodSite.getStart(), rodSite.getStart());
@ -137,19 +136,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases); return new AlignmentContext(site, new ReadBackedPileupImpl(site), skippedBases);
} }
private RefMetaDataTracker createTracker( Collection<RODRecordList> allTracksHere ) {
RefMetaDataTracker t = new RefMetaDataTracker(allTracksHere.size());
for ( RODRecordList track : allTracksHere ) {
if ( ! t.hasROD(track.getName()) )
t.bind(track.getName(), track);
}
// special case the interval again -- add it into the ROD
if ( interval != null ) { t.bind(interval.getName(), interval); }
return t;
}
private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) { private Collection<RODRecordList> getSpanningTracks(RODRecordList marker) {
return rodQueue.allElementsLTE(marker); return rodQueue.allElementsLTE(marker);
} }
@ -197,10 +183,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
return getSkippedBases(getLocOneBeyondShard()); return getSkippedBases(getLocOneBeyondShard());
} }
public RefMetaDataTracker getTracker() {
return tracker;
}
/** /**
* Closes the current view. * Closes the current view.
*/ */
@ -209,6 +191,6 @@ public class RodLocusView extends LocusView implements ReferenceOrderedView {
state.dataSource.close( state.iterator ); state.dataSource.close( state.iterator );
rodQueue = null; rodQueue = null;
tracker = null; allTracksHere = null;
} }
} }

View File

@ -59,8 +59,8 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
*/ */
public FilePointer next() { public FilePointer next() {
FilePointer current = wrappedIterator.next(); FilePointer current = wrappedIterator.next();
//while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0) while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
// current = current.combine(parser,wrappedIterator.next()); current = current.combine(parser,wrappedIterator.next());
return current; return current;
} }

View File

@ -893,6 +893,7 @@ public class SAMDataSource {
* Custom representation of interval bounds. * Custom representation of interval bounds.
* Makes it simpler to track current position. * Makes it simpler to track current position.
*/ */
private int[] intervalContigIndices;
private int[] intervalStarts; private int[] intervalStarts;
private int[] intervalEnds; private int[] intervalEnds;
@ -917,12 +918,14 @@ public class SAMDataSource {
if(foundMappedIntervals) { if(foundMappedIntervals) {
if(keepOnlyUnmappedReads) if(keepOnlyUnmappedReads)
throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads"); throw new ReviewedStingException("Tried to apply IntervalOverlapFilteringIterator to a mixed of mapped and unmapped intervals. Please apply this filter to only mapped or only unmapped reads");
this.intervalContigIndices = new int[intervals.size()];
this.intervalStarts = new int[intervals.size()]; this.intervalStarts = new int[intervals.size()];
this.intervalEnds = new int[intervals.size()]; this.intervalEnds = new int[intervals.size()];
int i = 0; int i = 0;
for(GenomeLoc interval: intervals) { for(GenomeLoc interval: intervals) {
intervalStarts[i] = (int)interval.getStart(); intervalContigIndices[i] = interval.getContigIndex();
intervalEnds[i] = (int)interval.getStop(); intervalStarts[i] = interval.getStart();
intervalEnds[i] = interval.getStop();
i++; i++;
} }
} }
@ -961,11 +964,10 @@ public class SAMDataSource {
while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) { while(nextRead == null && (keepOnlyUnmappedReads || currentBound < intervalStarts.length)) {
if(!keepOnlyUnmappedReads) { if(!keepOnlyUnmappedReads) {
// Mapped read filter; check against GenomeLoc-derived bounds. // Mapped read filter; check against GenomeLoc-derived bounds.
if(candidateRead.getAlignmentEnd() >= intervalStarts[currentBound] || if(readEndsOnOrAfterStartingBound(candidateRead)) {
(candidateRead.getReadUnmappedFlag() && candidateRead.getAlignmentStart() >= intervalStarts[currentBound])) { // This read ends after the current interval begins.
// This read ends after the current interval begins (or, if unmapped, starts within the bounds of the interval.
// Promising, but this read must be checked against the ending bound. // Promising, but this read must be checked against the ending bound.
if(candidateRead.getAlignmentStart() <= intervalEnds[currentBound]) { if(readStartsOnOrBeforeEndingBound(candidateRead)) {
// Yes, this read is within both bounds. This must be our next read. // Yes, this read is within both bounds. This must be our next read.
nextRead = candidateRead; nextRead = candidateRead;
break; break;
@ -993,6 +995,37 @@ public class SAMDataSource {
candidateRead = iterator.next(); candidateRead = iterator.next();
} }
} }
/**
* Check whether the read lies after the start of the current bound. If the read is unmapped but placed, its
* end will be distorted, so rely only on the alignment start.
* @param read The read to position-check.
* @return True if the read starts after the current bounds. False otherwise.
*/
private boolean readEndsOnOrAfterStartingBound(final SAMRecord read) {
return
// Read ends on a later contig, or...
read.getReferenceIndex() > intervalContigIndices[currentBound] ||
// Read ends of this contig...
(read.getReferenceIndex() == intervalContigIndices[currentBound] &&
// either after this location, or...
(read.getAlignmentEnd() >= intervalStarts[currentBound] ||
// read is unmapped but positioned and alignment start is on or after this start point.
(read.getReadUnmappedFlag() && read.getAlignmentStart() >= intervalStarts[currentBound])));
}
/**
* Check whether the read lies before the end of the current bound.
* @param read The read to position-check.
* @return True if the read starts after the current bounds. False otherwise.
*/
private boolean readStartsOnOrBeforeEndingBound(final SAMRecord read) {
return
// Read starts on a prior contig, or...
read.getReferenceIndex() < intervalContigIndices[currentBound] ||
// Read starts on this contig and the alignment start is registered before this end point.
(read.getReferenceIndex() == intervalContigIndices[currentBound] && read.getAlignmentStart() <= intervalEnds[currentBound]);
}
} }
/** /**

View File

@ -41,7 +41,7 @@ import java.io.File;
* Loads reference data from fasta file * Loads reference data from fasta file
* Looks for fai and dict files, and tries to create them if they don't exist * Looks for fai and dict files, and tries to create them if they don't exist
*/ */
public class ReferenceDataSource implements ReferenceDataSourceProgressListener { public class ReferenceDataSource {
private IndexedFastaSequenceFile index; private IndexedFastaSequenceFile index;
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
@ -75,7 +75,7 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
// get exclusive lock // get exclusive lock
if (!indexLock.exclusiveLock()) if (!indexLock.exclusiveLock())
throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile); throw new UserException.CouldNotCreateReferenceIndexFileBecauseOfLock(dictFile);
FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, this); FastaSequenceIndexBuilder faiBuilder = new FastaSequenceIndexBuilder(fastaFile, true);
FastaSequenceIndex sequenceIndex = faiBuilder.createIndex(); FastaSequenceIndex sequenceIndex = faiBuilder.createIndex();
FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile); FastaSequenceIndexBuilder.saveAsFaiFile(sequenceIndex, indexFile);
} }
@ -194,13 +194,4 @@ public class ReferenceDataSource implements ReferenceDataSourceProgressListener
public IndexedFastaSequenceFile getReference() { public IndexedFastaSequenceFile getReference() {
return this.index; return this.index;
} }
/**
* Notify user of progress in creating fai file
* @param percent Percent of fasta file read as a percent
*/
public void percentProgress(int percent) {
System.out.println(String.format("PROGRESS UPDATE: file is %d percent complete", percent));
}
} }

View File

@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.datasources.rmd;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator; import org.broadinstitute.sting.gatk.refdata.utils.FlashBackIterator;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;

View File

@ -29,7 +29,7 @@ import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator; import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -110,11 +110,11 @@ public class ReferenceOrderedDataSource {
} }
public Class getType() { public Class getType() {
return builder.getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); return builder.getFeatureManager().getByTriplet(fileDescriptor).getCodecClass();
} }
public Class getRecordType() { public Class getRecordType() {
return builder.createCodec(getType(),getName()).getFeatureType(); return builder.getFeatureManager().getByTriplet(fileDescriptor).getFeatureClass();
} }
public File getFile() { public File getFile() {

View File

@ -0,0 +1,82 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.examples;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.ArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.RodWalker;
/**
* [Short one sentence description of this walker]
*
* <p>
* [Functionality of this walker]
* </p>
*
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
*
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
*
* @author Your Name
* @since Date created
*/
public class GATKDocsExample extends RodWalker<Integer, Integer> {
/**
* Put detailed documentation about the argument here. No need to duplicate the summary information
* in doc annotation field, as that will be added before this text in the documentation page.
*
* Notes:
* <ul>
* <li>This field can contain HTML as a normal javadoc</li>
* <li>Don't include information about the default value, as gatkdocs adds this automatically</li>
* <li>Try your best to describe in detail the behavior of the argument, as ultimately confusing
* docs here will just result in user posts on the forum</li>
* </ul>
*/
@Argument(fullName="full", shortName="short", doc="Brief summary of argument [~ 80 characters of text]", required=false)
private boolean myWalkerArgument = false;
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { return 0; }
public Integer reduceInit() { return 0; }
public Integer reduce(Integer value, Integer sum) { return value + sum; }
public void onTraversalDone(Integer result) { }
}

View File

@ -97,7 +97,6 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
if (!( walker instanceof TreeReducible )) if (!( walker instanceof TreeReducible ))
throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers"); throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
traversalEngine.startTimers();
ReduceTree reduceTree = new ReduceTree(this); ReduceTree reduceTree = new ReduceTree(this);
initializeWalker(walker); initializeWalker(walker);

View File

@ -44,15 +44,16 @@ public class LinearMicroScheduler extends MicroScheduler {
* @param shardStrategy A strategy for sharding the data. * @param shardStrategy A strategy for sharding the data.
*/ */
public Object execute(Walker walker, ShardStrategy shardStrategy) { public Object execute(Walker walker, ShardStrategy shardStrategy) {
traversalEngine.startTimers();
walker.initialize(); walker.initialize();
Accumulator accumulator = Accumulator.create(engine,walker); Accumulator accumulator = Accumulator.create(engine,walker);
boolean done = walker.isDone();
int counter = 0; int counter = 0;
for (Shard shard : processingTracker.onlyOwned(shardStrategy, engine.getName())) { for (Shard shard : shardStrategy ) {
if ( shard == null ) // we ran out of shards that aren't owned if ( done || shard == null ) // we ran out of shards that aren't owned
break; break;
traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) { if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker; LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata()); WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
@ -61,6 +62,7 @@ public class LinearMicroScheduler extends MicroScheduler {
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
accumulator.accumulate(dataProvider,result); accumulator.accumulate(dataProvider,result);
dataProvider.close(); dataProvider.close();
if ( walker.isDone() ) break;
} }
windowMaker.close(); windowMaker.close();
} }
@ -70,6 +72,8 @@ public class LinearMicroScheduler extends MicroScheduler {
accumulator.accumulate(dataProvider,result); accumulator.accumulate(dataProvider,result);
dataProvider.close(); dataProvider.close();
} }
done = walker.isDone();
} }
Object result = accumulator.finishTraversal(); Object result = accumulator.finishTraversal();

View File

@ -39,14 +39,10 @@ import org.broadinstitute.sting.gatk.traversals.*;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.threading.*;
import javax.management.JMException; import javax.management.JMException;
import javax.management.MBeanServer; import javax.management.MBeanServer;
import javax.management.ObjectName; import javax.management.ObjectName;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.PrintStream;
import java.lang.management.ManagementFactory; import java.lang.management.ManagementFactory;
import java.util.Collection; import java.util.Collection;
@ -83,8 +79,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
private final MBeanServer mBeanServer; private final MBeanServer mBeanServer;
private final ObjectName mBeanName; private final ObjectName mBeanName;
protected GenomeLocProcessingTracker processingTracker;
/** /**
* MicroScheduler factory function. Create a microscheduler appropriate for reducing the * MicroScheduler factory function. Create a microscheduler appropriate for reducing the
* selected walker. * selected walker.
@ -98,11 +92,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
* @return The best-fit microscheduler. * @return The best-fit microscheduler.
*/ */
public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) { public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
if (engine.getArguments().processingTrackerFile != null) {
if ( walker instanceof ReadWalker )
throw new UserException.BadArgumentValue("C", String.format("Distributed GATK processing not enabled for read walkers"));
}
if (walker instanceof TreeReducible && nThreadsToUse > 1) { if (walker instanceof TreeReducible && nThreadsToUse > 1) {
if(walker.isReduceByInterval()) if(walker.isReduceByInterval())
throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass()))); throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval. Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution. Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
@ -157,33 +146,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
catch (JMException ex) { catch (JMException ex) {
throw new ReviewedStingException("Unable to register microscheduler with JMX", ex); throw new ReviewedStingException("Unable to register microscheduler with JMX", ex);
} }
//
// create the processing tracker
//
if ( engine.getArguments().processingTrackerFile != null ) {
logger.warn("Distributed GATK is an experimental engine feature, and is likely to not work correctly or reliably.");
if ( engine.getArguments().restartProcessingTracker && engine.getArguments().processingTrackerFile.exists() ) {
engine.getArguments().processingTrackerFile.delete();
logger.info("Deleting ProcessingTracker file " + engine.getArguments().processingTrackerFile);
}
PrintStream statusStream = null;
if ( engine.getArguments().processingTrackerStatusFile != null ) {
try {
statusStream = new PrintStream(new FileOutputStream(engine.getArguments().processingTrackerStatusFile));
} catch ( FileNotFoundException e) {
throw new UserException.CouldNotCreateOutputFile(engine.getArguments().processingTrackerStatusFile, e);
}
}
ClosableReentrantLock lock = new SharedFileThreadSafeLock(engine.getArguments().processingTrackerFile, engine.getArguments().processTrackerID);
processingTracker = new FileBackedGenomeLocProcessingTracker(engine.getArguments().processingTrackerFile, engine.getGenomeLocParser(), lock, statusStream) ;
logger.info("Creating ProcessingTracker using shared file " + engine.getArguments().processingTrackerFile + " process.id = " + engine.getName() + " CID = " + engine.getArguments().processTrackerID);
} else {
// create a NoOp version that doesn't do anything but say "yes"
processingTracker = new NoOpGenomeLocProcessingTracker();
}
} }
/** /**

View File

@ -57,6 +57,7 @@ public class ShardTraverser implements Callable {
public Object call() { public Object call() {
try { try {
traversalEngine.startTimersIfNecessary();
long startTime = System.currentTimeMillis(); long startTime = System.currentTimeMillis();
Object accumulator = walker.reduceInit(); Object accumulator = walker.reduceInit();

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out FailsVendorQualityCheck reads. * Filter out FailsVendorQualityCheck reads.
*/ */
public class FailsVendorQualityCheckReadFilter extends ReadFilter { public class FailsVendorQualityCheckFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) { public boolean filterOut( final SAMRecord read ) {
return read.getReadFailsVendorQualityCheckFlag(); return read.getReadFailsVendorQualityCheckFlag();
} }

View File

@ -35,7 +35,7 @@ import org.broadinstitute.sting.commandline.Argument;
* @version 0.1 * @version 0.1
*/ */
public class MappingQualityReadFilter extends ReadFilter { public class MappingQualityFilter extends ReadFilter {
@Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false) @Argument(fullName = "min_mapping_quality_score", shortName = "mmq", doc = "Minimum read mapping quality required to consider a read for calling", required = false)
public int MIN_MAPPING_QUALTY_SCORE = 10; public int MIN_MAPPING_QUALTY_SCORE = 10;

View File

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.QualityUtils;
* @version 0.1 * @version 0.1
*/ */
public class MappingQualityUnavailableReadFilter extends ReadFilter { public class MappingQualityUnavailableFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE); return (rec.getMappingQuality() == QualityUtils.MAPPING_QUALITY_UNAVAILABLE);
} }

View File

@ -33,7 +33,7 @@ import net.sf.samtools.SAMRecord;
* @version 0.1 * @version 0.1
*/ */
public class MappingQualityZeroReadFilter extends ReadFilter { public class MappingQualityZeroFilter extends ReadFilter {
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {
return (rec.getMappingQuality() == 0); return (rec.getMappingQuality() == 0);
} }

View File

@ -34,7 +34,7 @@ import net.sf.samtools.SAMRecord;
* Filter out duplicate reads. * Filter out duplicate reads.
*/ */
public class NotPrimaryAlignmentReadFilter extends ReadFilter { public class NotPrimaryAlignmentFilter extends ReadFilter {
public boolean filterOut( final SAMRecord read ) { public boolean filterOut( final SAMRecord read ) {
return read.getNotPrimaryAlignmentFlag(); return read.getNotPrimaryAlignmentFlag();
} }

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1 * @version 0.1
*/ */
public class PlatformFilter extends ReadFilter { public class PlatformFilter extends ReadFilter {
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false) @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames; protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) { public boolean filterOut(SAMRecord rec) {

View File

@ -2,10 +2,14 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
/** /**
* A SamRecordFilter that also depends on the header. * A SamRecordFilter that also depends on the header.
*/ */
@DocumentedGATKFeature(
groupName = "Read filters",
summary = "GATK Engine arguments that filter or transfer incoming SAM/BAM data files" )
public abstract class ReadFilter implements SamRecordFilter { public abstract class ReadFilter implements SamRecordFilter {
/** /**
* Sets the header for use by this filter. * Sets the header for use by this filter.

View File

@ -0,0 +1,72 @@
/*
* Copyright (c) 2009 The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
/**
* A read filter (transformer) that sets all reads mapping quality to a given value.
*
* <p>
* If a BAM file contains erroneous or missing mapping qualities, this 'filter' will set
* all your mapping qualities to a given value. Default being 60.
* </p>
*
*
* <h2>Input</h2>
* <p>
* BAM file(s)
* </p>
*
*
* <h2>Output</h2>
* <p>
* BAM file(s) with all reads mapping qualities reassigned
* </p>
*
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -rf ReassignMappingQuality
* -DMQ 35
* </pre>
*
* @author carneiro
* @since 8/8/11
*/
public class ReassignMappingQualityFilter extends ReadFilter {
@Argument(fullName = "default_mapping_quality", shortName = "DMQ", doc = "Default read mapping quality to assign to all reads", required = false)
public int defaultMappingQuality = 60;
public boolean filterOut(SAMRecord rec) {
rec.setMappingQuality(defaultMappingQuality);
return false;
}
}

View File

@ -87,8 +87,8 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
writer.writeHeader(stub.getVCFHeader()); writer.writeHeader(stub.getVCFHeader());
} }
public void add(VariantContext vc, byte ref) { public void add(VariantContext vc) {
writer.add(vc, ref); writer.add(vc);
} }
/** /**
@ -117,7 +117,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false); BasicFeatureSource<VariantContext> source = BasicFeatureSource.getFeatureSource(file.getAbsolutePath(), new VCFCodec(), false);
for ( VariantContext vc : source.iterator() ) { for ( VariantContext vc : source.iterator() ) {
target.writer.add(vc, vc.getReferenceBaseForIndel()); target.writer.add(vc);
} }
source.close(); source.close();

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.reflect.Constructor; import java.lang.reflect.Constructor;
import java.lang.reflect.Type;
/** /**
* Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations. * Insert an OutputStreamStub instead of a full-fledged concrete OutputStream implementations.
@ -69,16 +70,21 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class type) { public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
OutputStreamStub stub = new OutputStreamStub(defaultOutputStream); OutputStreamStub stub = new OutputStreamStub(defaultOutputStream);
engine.addOutput(stub); engine.addOutput(stub);
return createInstanceOfClass(type,stub); return createInstanceOfClass((Class)type,stub);
} }
@Override @Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition definition = createDefaultArgumentDefinition(source); ArgumentDefinition definition = createDefaultArgumentDefinition(source);
String fileName = getArgumentValue( definition, matches ); String fileName = getArgumentValue( definition, matches );
@ -91,7 +97,7 @@ public class OutputStreamArgumentTypeDescriptor extends ArgumentTypeDescriptor {
engine.addOutput(stub); engine.addOutput(stub);
Object result = createInstanceOfClass(type,stub); Object result = createInstanceOfClass(makeRawTypeIfNecessary(type),stub);
// WARNING: Side effects required by engine! // WARNING: Side effects required by engine!
parsingEngine.addTags(result,getArgumentTags(matches)); parsingEngine.addTags(result,getArgumentTags(matches));

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder; import org.broadinstitute.sting.utils.sam.SAMFileReaderBuilder;
import java.io.File; import java.io.File;
import java.lang.reflect.Type;
/** /**
* Describe how to parse SAMFileReaders. * Describe how to parse SAMFileReaders.
@ -52,14 +53,13 @@ public class SAMFileReaderArgumentTypeDescriptor extends ArgumentTypeDescriptor
this.engine = engine; this.engine = engine;
} }
@Override @Override
public boolean supports( Class type ) { public boolean supports( Class type ) {
return SAMFileReader.class.isAssignableFrom(type); return SAMFileReader.class.isAssignableFrom(type);
} }
@Override @Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
SAMFileReaderBuilder builder = new SAMFileReaderBuilder(); SAMFileReaderBuilder builder = new SAMFileReaderBuilder();
String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches ); String readerFileName = getArgumentValue( createDefaultArgumentDefinition(source), matches );

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.annotation.Annotation; import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
@ -93,7 +94,12 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
} }
@Override @Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream); SAMFileWriterStub stub = new SAMFileWriterStub(engine,defaultOutputStream);
@ -102,7 +108,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
} }
@Override @Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
// Extract all possible parameters that could be passed to a BAM file writer? // Extract all possible parameters that could be passed to a BAM file writer?
ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source); ArgumentDefinition bamArgumentDefinition = createBAMArgumentDefinition(source);
String writerFileName = getArgumentValue( bamArgumentDefinition, matches ); String writerFileName = getArgumentValue( bamArgumentDefinition, matches );

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File; import java.io.File;
import java.io.OutputStream; import java.io.OutputStream;
import java.lang.reflect.Type;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet; import java.util.HashSet;
@ -108,7 +109,12 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
@Override @Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source,Class<?> type) { public String typeDefaultDocString(ArgumentSource source) {
return "stdout";
}
@Override
public Object createTypeDefault(ParsingEngine parsingEngine,ArgumentSource source, Type type) {
if(!source.isRequired()) if(!source.isRequired())
throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default."); throw new ReviewedStingException("BUG: tried to create type default for argument type descriptor that can't support a type default.");
VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false); VCFWriterStub stub = new VCFWriterStub(engine, defaultOutputStream, false, argumentSources, false, false);
@ -124,7 +130,7 @@ public class VCFWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor {
* @return Transform from the matches into the associated argument. * @return Transform from the matches into the associated argument.
*/ */
@Override @Override
public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Class type, ArgumentMatches matches ) { public Object parse( ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches ) {
ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source); ArgumentDefinition defaultArgumentDefinition = createDefaultArgumentDefinition(source);
// Get the filename for the genotype file, if it exists. If not, we'll need to send output to out. // Get the filename for the genotype file, if it exists. If not, we'll need to send output to out.
String writerFileName = getArgumentValue(defaultArgumentDefinition,matches); String writerFileName = getArgumentValue(defaultArgumentDefinition,matches);

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs; package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -177,14 +178,23 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
vcfHeader = header; vcfHeader = header;
// Check for the command-line argument header line. If not present, add it in. // Check for the command-line argument header line. If not present, add it in.
VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine(); if ( !skipWritingHeader ) {
boolean foundCommandLineHeaderLine = false; VCFHeaderLine commandLineArgHeaderLine = getCommandLineArgumentHeaderLine();
for(VCFHeaderLine line: vcfHeader.getMetaData()) { boolean foundCommandLineHeaderLine = false;
if(line.getKey().equals(commandLineArgHeaderLine.getKey())) for (VCFHeaderLine line: vcfHeader.getMetaData()) {
foundCommandLineHeaderLine = true; if ( line.getKey().equals(commandLineArgHeaderLine.getKey()) )
foundCommandLineHeaderLine = true;
}
if ( !foundCommandLineHeaderLine )
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
// also put in the reference contig header lines
String assembly = getReferenceAssembly(engine.getArguments().referenceFile.getName());
for ( SAMSequenceRecord contig : engine.getReferenceDataSource().getReference().getSequenceDictionary().getSequences() )
vcfHeader.addMetaDataLine(getContigHeaderLine(contig, assembly));
vcfHeader.addMetaDataLine(new VCFHeaderLine("reference", "file://" + engine.getArguments().referenceFile.getAbsolutePath()));
} }
if(!foundCommandLineHeaderLine && !skipWritingHeader)
vcfHeader.addMetaDataLine(commandLineArgHeaderLine);
outputTracker.getStorage(this).writeHeader(vcfHeader); outputTracker.getStorage(this).writeHeader(vcfHeader);
} }
@ -192,8 +202,8 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
/** /**
* @{inheritDoc} * @{inheritDoc}
*/ */
public void add(VariantContext vc, byte ref) { public void add(VariantContext vc) {
outputTracker.getStorage(this).add(vc,ref); outputTracker.getStorage(this).add(vc);
} }
/** /**
@ -220,4 +230,27 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class); CommandLineExecutable executable = JVMUtils.getObjectOfType(argumentSources,CommandLineExecutable.class);
return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\""); return new VCFHeaderLine(executable.getAnalysisName(), "\"" + engine.createApproximateCommandLineArgumentString(argumentSources.toArray()) + "\"");
} }
private VCFHeaderLine getContigHeaderLine(SAMSequenceRecord contig, String assembly) {
String val;
if ( assembly != null )
val = String.format("<ID=%s,length=%d,assembly=%s>", contig.getSequenceName(), contig.getSequenceLength(), assembly);
else
val = String.format("<ID=%s,length=%d>", contig.getSequenceName(), contig.getSequenceLength());
return new VCFHeaderLine("contig", val);
}
private String getReferenceAssembly(String refPath) {
// This doesn't need to be perfect as it's not a required VCF header line, but we might as well give it a shot
String assembly = null;
if ( refPath.indexOf("b37") != -1 || refPath.indexOf("v37") != -1 )
assembly = "b37";
else if ( refPath.indexOf("b36") != -1 )
assembly = "b36";
else if ( refPath.indexOf("hg18") != -1 )
assembly = "hg18";
else if ( refPath.indexOf("hg19") != -1 )
assembly = "hg19";
return assembly;
}
} }

View File

@ -46,7 +46,6 @@ import org.simpleframework.xml.stream.Format;
import org.simpleframework.xml.stream.HyphenStyle; import org.simpleframework.xml.stream.HyphenStyle;
import java.io.*; import java.io.*;
import java.net.InetAddress;
import java.security.NoSuchAlgorithmException; import java.security.NoSuchAlgorithmException;
import java.text.DateFormat; import java.text.DateFormat;
import java.text.SimpleDateFormat; import java.text.SimpleDateFormat;
@ -154,9 +153,13 @@ public class GATKRunReport {
private long nReads; private long nReads;
public enum PhoneHomeOption { public enum PhoneHomeOption {
/** Disable phone home */
NO_ET, NO_ET,
/** Standard option. Writes to local repository if it can be found, or S3 otherwise */
STANDARD, STANDARD,
/** Force output to STDOUT. For debugging only */
STDOUT, STDOUT,
/** Force output to S3. For debugging only */
AWS_S3 // todo -- remove me -- really just for testing purposes AWS_S3 // todo -- remove me -- really just for testing purposes
} }
@ -226,22 +229,6 @@ public class GATKRunReport {
} }
/**
* Helper utility that calls into the InetAddress system to resolve the hostname. If this fails,
* unresolvable gets returned instead.
*
* @return
*/
private String resolveHostname() {
try {
return InetAddress.getLocalHost().getCanonicalHostName();
}
catch (java.net.UnknownHostException uhe) { // [beware typo in code sample -dmw]
return "unresolvable";
// handle exception
}
}
public void postReport(PhoneHomeOption type) { public void postReport(PhoneHomeOption type) {
logger.debug("Posting report of type " + type); logger.debug("Posting report of type " + type);
switch (type) { switch (type) {
@ -321,7 +308,7 @@ public class GATKRunReport {
private void postReportToAWSS3() { private void postReportToAWSS3() {
// modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html // modifying example code from http://jets3t.s3.amazonaws.com/toolkit/code-samples.html
this.hostName = resolveHostname(); // we want to fill in the host name this.hostName = Utils.resolveHostname(); // we want to fill in the host name
File localFile = postReportToLocalDisk(new File("./")); File localFile = postReportToLocalDisk(new File("./"));
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) { // we succeeded in creating the local file if ( localFile != null ) { // we succeeded in creating the local file

View File

@ -1,238 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.FileNotFoundException;
import java.lang.reflect.Constructor;
import java.util.Iterator;
import java.util.regex.Pattern;
/**
* This is a low-level iterator designed to provide system-wide generic support for reading record-oriented data
* files. The only assumption made is that every line in the file provides a complete and separate data record. The records
* can be associated with coordinates or coordinate intervals, there can be one or more records associated with a given
* position/interval, or intervals can overlap. The records must be comprised of delimited fields, but the format is
* otherwise free. For any specific line-based data format, an appropriate implementation of ReferenceOrderedDatum must be
* provided that is capable of parsing itself from a single line of data. This implementation will be used,
* through reflection mechanism, as a callback to do all the work.
*
* The model is, hence, as follows:
*
* String dataRecord <---> RodImplementation ( ::parseLine(dataRecord.split(delimiter)) is aware of the format and fills
* an instance of RodImplementation with data values from dataRecord line).
*
*
* instantiation of RODRecordIterator(dataFile, trackName, RodImplementation.class) will immediately provide an iterator
* that walks along the dataFile line by line, and on each call to next() returns a new RodImplementation object
* representing a single line (record) of data. The returned object will be initialized with "track name" trackName -
* track names (as returned by ROD.getName()) are often used in other parts of the code to distinguish between
* multiple streams of (possibly heterogeneous) annotation data bound to an application.
*
* This generic iterator skips and ignores a) empty lines, b) lines starting with '#' (comments): they are never sent back
* to the ROD implementation class for processing.
*
* This iterator does not actually check if the ROD records (lines) in the file are indeed ordedered by coordinate,
* and it does not depend on such an order as it still implements a low-level line-based traversal of the data. Higher-level
* iterators/wrappers will perform all the necessary checks.
*
* Note: some data formats/ROD implementations may require a header line in the file. In this case the current (ugly)
* mechanism is as follows:
* 1) rod implementation's ::initialize(file) method should be able to open the file, find and read the header line
* and return the header object (to be kept by the iterator)
* 2) rod implementation's ::parseLine(header,line) method should be capable of making use of that saved header object now served to it
* and
* 3) ::parseLine(header,line) should be able to recognize the original header line in the file and skip it (after ROD's initialize()
* method is called, the iterator will re-open the file and start reading it from the very beginning; there is no
* other way, except for "smart" ::parseLine(), to avoid reading in the header line as "data").
*
* Created by IntelliJ IDEA.
* User: asivache
* Date: Sep 10, 2009
* Time: 1:22:23 PM
* To change this template use File | Settings | File Templates.
*/
public class RODRecordIterator<ROD extends ReferenceOrderedDatum> implements Iterator<ROD> {
private PushbackIterator<String> reader;
// stores name of the track this iterator reads (will be also returned by getName() of ROD objects
// generated by this iterator)
private String name;
// we keep the file object, only to use file name in error reports
private File file;
// rod type; this is what we will instantiate for RODs at runtime
private Class<ROD> type;
private Object header = null; // Some RODs may use header
// field delimiter in the file. Should it be the job of the iterator to split the lines though? RODs can do that!
private String fieldDelimiter;
// constructor for the ROD objects we are going to return. Constructor that takes the track name as its single arg is required.
private Constructor<ROD> named_constructor;
// keep track of the lines we are reading. used for error messages only.
private long linenum = 0;
private boolean allow_empty = true;
private boolean allow_comments = true;
public static Pattern EMPTYLINE_PATTERN = Pattern.compile("^\\s*$");
public RODRecordIterator(File file, String name, Class<ROD> type) {
try {
reader = new PushbackIterator<String>(new XReadLines(file));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(file, e);
}
this.file = file;
this.name = name;
this.type = type;
try {
named_constructor = type.getConstructor(String.class);
}
catch (java.lang.NoSuchMethodException e) {
throw new ReviewedStingException("ROD class "+type.getName()+" does not have constructor that accepts a single String argument (track name)");
}
ROD rod = instantiateROD(name);
fieldDelimiter = rod.delimiterRegex(); // get delimiter from the ROD itself
try {
header = rod.initialize(file);
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(file, "ROD "+type.getName() + " failed to initialize properly from file "+file);
}
}
/**
* Returns <tt>true</tt> if the iteration has more elements. (In other
* words, returns <tt>true</tt> if <tt>next</tt> would return an element
* rather than throwing an exception.)
*
* @return <tt>true</tt> if the iterator has more elements.
*/
public boolean hasNext() {
if ( allow_empty || allow_comments ) {
while ( reader.hasNext() ) {
String line = reader.next();
if ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ) continue; // skip empty line
if ( allow_comments && line.charAt(0) == '#' ) continue; // skip comment lines
// the line is not empty and not a comment line, so we have next after all
reader.pushback(line);
return true;
}
return false; // oops, we end up here if there's nothing left
} else {
return reader.hasNext();
}
}
/**
* Returns the next valid ROD record in the file, skipping empty and comment lines.
*
* @return the next element in the iteration.
* @throws java.util.NoSuchElementException
* iteration has no more elements.
*/
public ROD next() {
ROD n = null;
boolean parsed_ok = false;
String line ;
while ( ! parsed_ok && reader.hasNext() ) {
line = reader.next();
linenum++;
while ( allow_empty && EMPTYLINE_PATTERN.matcher(line).matches() ||
allow_comments && line.charAt(0) == '#' ) {
if ( reader.hasNext() ) {
line = reader.next();
linenum++;
} else {
line = null;
break;
}
}
if ( line == null ) break; // if we ran out of lines while skipping empty lines/comments, then we are done
String parts[] = line.split(fieldDelimiter);
try {
n = instantiateROD(name);
parsed_ok = n.parseLine(header,parts) ;
}
catch ( Exception e ) {
throw new UserException.MalformedFile(file, "Failed to parse ROD data ("+type.getName()+") from file "+ file + " at line #"+linenum+
"\nOffending line: "+line+
"\nReason ("+e.getClass().getName()+")", e);
}
}
return n;
}
/**
* Removes from the underlying collection the last element returned by the
* iterator (optional operation). This method can be called only once per
* call to <tt>next</tt>. The behavior of an iterator is unspecified if
* the underlying collection is modified while the iteration is in
* progress in any way other than by calling this method.
*
* @throws UnsupportedOperationException if the <tt>remove</tt>
* operation is not supported by this Iterator.
* @throws IllegalStateException if the <tt>next</tt> method has not
* yet been called, or the <tt>remove</tt> method has already
* been called after the last call to the <tt>next</tt>
* method.
*/
public void remove() {
throw new UnsupportedOperationException("remove() operation is not supported by RODRecordIterator");
}
/** Instantiates appropriate implementation of the ROD used by this iteratot. The 'name' argument is the name
* of the ROD track.
* @param name
* @return
*/
private ROD instantiateROD(final String name) {
try {
return (ROD) named_constructor.newInstance(name);
} catch (Exception e) {
throw new DynamicClassResolutionException(named_constructor.getDeclaringClass(), e);
}
}
}

View File

@ -1,13 +1,15 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList; import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
@ -18,348 +20,402 @@ import java.util.*;
* The standard interaction model is: * The standard interaction model is:
* *
* Traversal system arrives at a site, which has a bunch of RMDs covering it * Traversal system arrives at a site, which has a bunch of RMDs covering it
Genotype * Traversal calls tracker.bind(name, RMD) for each RMDs in RMDs * Traversal passes creates a tracker and passes it to the walker
* Traversal passes tracker to the walker * walker calls get(rodBinding) to obtain the RMDs values at this site for the track
* walker calls lookup(name, default) to obtain the RMDs values at this site, or default if none was * associated with rodBinding.
* bound at this site. *
* Note that this is an immutable class. Once created the underlying data structures
* cannot be modified
* *
* User: mdepristo * User: mdepristo
* Date: Apr 3, 2009 * Date: Apr 3, 2009
* Time: 3:05:23 PM * Time: 3:05:23 PM
*/ */
public class RefMetaDataTracker { public class RefMetaDataTracker {
// TODO: this should be a list, not a map, actually
private final static RODRecordList EMPTY_ROD_RECORD_LIST = new RODRecordListImpl("EMPTY");
final Map<String, RODRecordList> map; final Map<String, RODRecordList> map;
protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class); final ReferenceContext ref;
final protected static Logger logger = Logger.getLogger(RefMetaDataTracker.class);
public RefMetaDataTracker(int nBindings) { // ------------------------------------------------------------------------------------------
if ( nBindings == 0 ) //
//
// Special ENGINE interaction functions
//
//
// ------------------------------------------------------------------------------------------
public RefMetaDataTracker(final Collection<RODRecordList> allBindings, final ReferenceContext ref) {
this.ref = ref;
// set up the map
if ( allBindings.isEmpty() )
map = Collections.emptyMap(); map = Collections.emptyMap();
else else {
map = new HashMap<String, RODRecordList>(nBindings); Map<String, RODRecordList> tmap = new HashMap<String, RODRecordList>(allBindings.size());
for ( RODRecordList rod : allBindings ) {
if ( rod != null && ! rod.isEmpty() )
tmap.put(canonicalName(rod.getName()), rod);
}
// ensure that no one modifies the map itself
map = Collections.unmodifiableMap(tmap);
}
}
// ------------------------------------------------------------------------------------------
//
//
// Generic accessors
//
//
// ------------------------------------------------------------------------------------------
/**
* Gets all of the Tribble features spanning this locus, returning them as a list of specific
* type T extending Feature. This function looks across all tracks to find the Features, so
* if you have two tracks A and B each containing 1 Feature, then getValues will return
* a list containing both features.
*
* Note that this function assumes that all of the bound features are instances of or
* subclasses of T. A ClassCastException will occur if this isn't the case. If you want
* to get all Features without any danger of such an exception use the root Tribble
* interface Feature.
*
* @param type The type of the underlying objects bound here
* @param <T> as above
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"type != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Class<T> type) {
return addValues(map.keySet(), type, new ArrayList<T>(), null, false, false);
} }
/** /**
* get all the reference meta data associated with a track name. * Provides the same functionality as @link #getValues(Class<T>) but will only include
* @param name the name of the track we're looking for * Features that start as the GenomeLoc provide onlyAtThisLoc.
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a
* dbSNP RMD this will be a RodDbSNP, etc.
* *
* Important: The list returned by this function is guaranteed not to be null, but may be empty! * @param type The type of the underlying objects bound here
* @param onlyAtThisLoc
* @param <T> as above
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/ */
public List<Object> getReferenceMetaData(final String name) { @Requires({"type != null", "onlyAtThisLoc != null"})
RODRecordList list = getTrackDataByName(name, true); @Ensures("result != null")
List<Object> objects = new ArrayList<Object>(); public <T extends Feature> List<T> getValues(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
if (list == null) return objects; return addValues(map.keySet(), type, new ArrayList<T>(), onlyAtThisLoc, true, false);
for (GATKFeature feature : list)
objects.add(feature.getUnderlyingObject());
return objects;
} }
/** /**
* get all the reference meta data associated with a track name. * Uses the same logic as @link #getValues(Class) but arbitrary select one of the resulting
* @param name the name of the track we're looking for * elements of the list to return. That is, if there would be two elements in the result of
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with * @link #getValues(Class), one of these two is selected, and which one it will be isn't
* the passed in parameter (false). * specified. Consequently, this method is only really safe if (1) you absolutely know
* @return a list of objects, representing the underlying objects that the tracks produce. I.e. for a * that only one binding will meet the constraints of @link #getValues(Class) or (2)
* dbSNP rod this will be a RodDbSNP, etc. * you truly don't care which of the multiple bindings available you are going to examine.
* *
* Important: The list returned by this function is guaranteed not to be null, but may be empty! * If there are no bindings here, getFirstValue() return null
*
* @param type The type of the underlying objects bound here
* @param <T> as above
* @return A random single element the RODs bound here, or null if none are bound.
*/ */
public List<Object> getReferenceMetaData(final String name, boolean requireExactMatch) { @Requires({"type != null"})
RODRecordList list = getTrackDataByName(name, requireExactMatch); public <T extends Feature> T getFirstValue(final Class<T> type) {
List<Object> objects = new ArrayList<Object>(); return safeGetFirst(getValues(type));
if (list == null) return objects;
for (GATKFeature feature : list)
objects.add(feature.getUnderlyingObject());
return objects;
} }
/** /**
* get all the GATK features associated with a specific track name * Uses the same logic as @link #getValue(Class,GenomeLoc) to determine the list
* @param name the name of the track we're looking for * of eligible Features and @link #getFirstValue(Class) to select a single
* @param requireExactMatch do we require an exact match for the name (true) or do we require only that the name starts with * element from the interval list.
* the passed in parameter (false).
* @return a list of GATKFeatures for the target rmd
* *
* Important: The list returned by this function is guaranteed not to be null, but may be empty! * @param type The type of the underlying objects bound here
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the RODs bound here starting at onlyAtThisLoc, or null if none are bound.
*/ */
public List<GATKFeature> getGATKFeatureMetaData(final String name, boolean requireExactMatch) { @Requires({"type != null", "onlyAtThisLoc != null"})
List<GATKFeature> feat = getTrackDataByName(name,requireExactMatch); public <T extends Feature> T getFirstValue(final Class<T> type, final GenomeLoc onlyAtThisLoc) {
return (feat == null) ? new ArrayList<GATKFeature>() : feat; // to satisfy the above requirement that we don't return null return safeGetFirst(getValues(type, onlyAtThisLoc));
} }
/** /**
* get a singleton record, given the name and a type. This function will return the first record at the current position seen, * Gets all of the Tribble features bound to RodBinding spanning this locus, returning them as
* and emit a logger warning if there were more than one option. * a list of specific type T extending Feature.
* *
* WARNING: this method is deprecated, since we now suppport more than one RMD at a single position for all tracks. If there are * Note that this function assumes that all of the bound features are instances of or
* are multiple RMD objects at this location, there is no contract for which object this method will pick, and which object gets * subclasses of T. A ClassCastException will occur if this isn't the case.
* picked may change from time to time! BE WARNED! *
* * @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param name the name of the track * @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @param clazz the underlying type to return * @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
* @param <T> the type to parameterize on, matching the clazz argument
* @return a record of type T, or null if no record is present.
*/ */
@Deprecated @Requires({"rodBinding != null"})
public <T> T lookup(final String name, Class<T> clazz) { @Ensures("result != null")
RODRecordList objects = getTrackDataByName(name, true); public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding) {
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), null, false, false);
}
// if emtpy or null return null; /**
if (objects == null || objects.size() < 1) return null; * Gets all of the Tribble features bound to any RodBinding in rodBindings,
* spanning this locus, returning them as a list of specific type T extending Feature.
*
* Note that this function assumes that all of the bound features are instances of or
* subclasses of T. A ClassCastException will occur if this isn't the case.
*
* @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBindings != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings) {
List<T> results = new ArrayList<T>(1);
for ( RodBinding<T> rodBinding : rodBindings )
results.addAll(getValues(rodBinding));
return results;
}
if (objects.size() > 1) /**
logger.info("lookup is choosing the first record from " + (objects.size() - 1) + " options"); * The same logic as @link #getValues(RodBinding) but enforces that each Feature start at onlyAtThisLoc
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
return addValues(rodBinding.getName(), rodBinding.getType(), new ArrayList<T>(1), getTrackDataByName(rodBinding), onlyAtThisLoc, true, false);
}
Object obj = objects.get(0).getUnderlyingObject(); /**
if (!(clazz.isAssignableFrom(obj.getClass()))) * The same logic as @link #getValues(List) but enforces that each Feature start at onlyAtThisLoc
throw new UserException.CommandLineException("Unable to case track named " + name + " to type of " + clazz.toString() *
+ " it's of type " + obj.getClass()); * @param rodBindings Only Features coming from the tracks associated with one of rodBindings are fetched
* @param <T> The Tribble Feature type of the rodBinding, and consequently the type of the resulting list of Features
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A freshly allocated list of all of the bindings, or an empty list if none are bound.
*/
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
@Ensures("result != null")
public <T extends Feature> List<T> getValues(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
List<T> results = new ArrayList<T>(1);
for ( RodBinding<T> rodBinding : rodBindings )
results.addAll(getValues(rodBinding, onlyAtThisLoc));
return results;
}
return (T)obj; /**
* Uses the same logic as @getValues(RodBinding) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> as above
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBinding != null"})
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding) {
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), null, false, true));
}
/**
* Uses the same logic as @getValues(RodBinding, GenomeLoc) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBinding Only Features coming from the track associated with this rodBinding are fetched
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBinding != null", "onlyAtThisLoc != null"})
public <T extends Feature> T getFirstValue(final RodBinding<T> rodBinding, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(addValues(rodBinding.getName(), rodBinding.getType(), null, getTrackDataByName(rodBinding), onlyAtThisLoc, true, true));
}
/**
* Uses the same logic as @getValues(List) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
* @param <T> as above
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBindings != null"})
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings) {
for ( RodBinding<T> rodBinding : rodBindings ) {
T val = getFirstValue(rodBinding);
if ( val != null )
return val;
}
return null;
}
/**
* Uses the same logic as @getValues(RodBinding,GenomeLoc) to determine the list
* of eligible Features and select a single element from the resulting set
* of eligible features.
*
* @param rodBindings Only Features coming from the tracks associated with these rodBindings are fetched
* @param <T> as above
* @param onlyAtThisLoc only Features starting at this site are considered
* @return A random single element the eligible Features found, or null if none are bound.
*/
@Requires({"rodBindings != null", "onlyAtThisLoc != null"})
public <T extends Feature> T getFirstValue(final Collection<RodBinding<T>> rodBindings, final GenomeLoc onlyAtThisLoc) {
for ( RodBinding<T> rodBinding : rodBindings ) {
T val = getFirstValue(rodBinding, onlyAtThisLoc);
if ( val != null )
return val;
}
return null;
} }
/** /**
* Is there a binding at this site to a ROD/track with the specified name? * Is there a binding at this site to a ROD/track with the specified name?
* *
* @param name the name of the rod * @param rodBinding the rod binding we want to know about
* @return true if it has the rod * @return true if any Features are bound in this tracker to rodBinding
*/ */
public boolean hasROD(final String name) { @Requires({"rodBinding != null"})
return map.containsKey(canonicalName(name)); public boolean hasValues(final RodBinding rodBinding) {
} return map.containsKey(canonicalName(rodBinding.getName()));
/**
* Get all of the RMDs at the current site. The collection is "flattened": for any track that has multiple records
* at the current site, they all will be added to the list as separate elements.
*
* @return collection of all rods
*/
public Collection<GATKFeature> getAllRods() {
List<GATKFeature> l = new ArrayList<GATKFeature>();
for ( RODRecordList rl : map.values() ) {
if ( rl == null ) continue; // how do we get null value stored for a track? shouldn't the track be missing from the map alltogether?
l.addAll(rl);
}
return l;
} }
/** /**
* Get all of the RMD tracks at the current site. Each track is returned as a single compound * Get all of the RMD tracks at the current site. Each track is returned as a single compound
* object (RODRecordList) that may contain multiple RMD records associated with the current site. * object (RODRecordList) that may contain multiple RMD records associated with the current site.
* *
* @return collection of all tracks * @return List of all tracks
*/ */
public Collection<RODRecordList> getBoundRodTracks() { public List<RODRecordList> getBoundRodTracks() {
LinkedList<RODRecordList> bound = new LinkedList<RODRecordList>(); return new ArrayList<RODRecordList>(map.values());
for ( RODRecordList value : map.values() ) {
if ( value != null && value.size() != 0 ) bound.add(value);
}
return bound;
} }
/** /**
* @return the number of ROD bindings (name -> value) where value is not empty in this tracker * The number of tracks with at least one value bound here
* @return the number of tracks with at least one bound Feature
*/ */
public int getNBoundRodTracks() { public int getNTracksWithBoundFeatures() {
return getNBoundRodTracks(null); return map.size();
} }
public int getNBoundRodTracks(final String excludeIn ) { // ------------------------------------------------------------------------------------------
final String exclude = excludeIn == null ? null : canonicalName(excludeIn); //
//
// old style accessors
//
// TODO -- DELETE ME
//
//
// ------------------------------------------------------------------------------------------
int n = 0; @Deprecated
for ( RODRecordList value : map.values() ) { public boolean hasValues(final String name) {
if ( value != null && ! value.isEmpty() ) { return map.containsKey(canonicalName(name));
if ( exclude == null || ! value.getName().equals(exclude) )
n++;
}
}
return n;
} }
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), null, false, false);
}
@Deprecated
public <T extends Feature> List<T> getValues(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return addValues(name, type, new ArrayList<T>(), getTrackDataByName(name), onlyAtThisLoc, true, false);
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name) {
return safeGetFirst(getValues(type, name));
}
@Deprecated
public <T extends Feature> T getFirstValue(final Class<T> type, final String name, final GenomeLoc onlyAtThisLoc) {
return safeGetFirst(getValues(type, name, onlyAtThisLoc));
}
// ------------------------------------------------------------------------------------------
//
//
// Private utility functions
//
//
// ------------------------------------------------------------------------------------------
/** /**
* Binds the list of reference ordered data records (RMDs) to track name at this site. Should be used only by the traversal * Helper function for getFirst() operations that takes a list of <T> and
* system to provide access to RMDs in a structured way to the walkers. * returns the first element, or null if no such element exists.
* *
* @param name the name of the track * @param l
* @param rod the collection of RMD data * @param <T>
*/
public void bind(final String name, RODRecordList rod) {
//logger.debug(String.format("Binding %s to %s", name, rod));
map.put(canonicalName(name), rod);
}
/**
* Converts all possible ROD tracks to VariantContexts objects, of all types, allowing any start and any number
* of entries per ROD.
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref) {
return getAllVariantContexts(ref, null, null, false, false);
}
/**
* Returns all of the variant contexts that start at the current location
* @param ref
* @param curLocation
* @return * @return
*/ */
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, GenomeLoc curLocation) { @Requires({"l != null"})
return getAllVariantContexts(ref, null, curLocation, true, false); final private <T extends Feature> T safeGetFirst(final List<T> l) {
return l.isEmpty() ? null : l.get(0);
} }
/** private <T extends Feature> List<T> addValues(final Collection<String> names,
* Converts all possible ROD tracks to VariantContexts objects. If allowedTypes != null, then only final Class<T> type,
* VariantContexts in the allow set of types will be returned. If requireStartsHere is true, then curLocation List<T> values,
* must not be null, and only records whose start position is == to curLocation.getStart() will be returned. final GenomeLoc curLocation,
* If takeFirstOnly is true, then only a single VariantContext will be converted from any individual ROD. Of course, final boolean requireStartHere,
* this single object must pass the allowed types and start here options if provided. Note that the result final boolean takeFirstOnly ) {
* may return multiple VariantContexts with the same name if that particular track contained multiple RODs spanning
* the current location.
*
* The name of each VariantContext corresponds to the ROD name.
*
* @param ref reference context
* @param allowedTypes allowed types
* @param curLocation location
* @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only?
* @return variant context
*/
public Collection<VariantContext> getAllVariantContexts(ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
List<VariantContext> contexts = new ArrayList<VariantContext>();
for ( RODRecordList rodList : getBoundRodTracks() ) {
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
return contexts;
}
/**
* Gets the variant contexts associated with track name name
*
* see getVariantContexts for more information.
*
* @param ref ReferenceContext to enable conversion to variant context
* @param name name
* @param curLocation location
* @param allowedTypes allowed types
* @param requireStartHere do we require the rod to start at this location?
* @param takeFirstOnly do we take the first rod only?
* @return variant context
*/
// public Collection<VariantContext> getVariantContexts(String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
// return getVariantContexts(null, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
// }
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
return getVariantContexts(ref, Arrays.asList(name), allowedTypes, curLocation, requireStartHere, takeFirstOnly);
}
// public Collection<VariantContext> getVariantContexts(Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
// return getVariantContexts(null, names, allowedTypes, curLocation, requireStartHere, takeFirstOnly);
// }
public Collection<VariantContext> getVariantContexts(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
for ( String name : names ) { for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,true); // require that the name is an exact match RODRecordList rodList = getTrackDataByName(name); // require that the name is an exact match
values = addValues(name, type, values, rodList, curLocation, requireStartHere, takeFirstOnly );
if ( rodList != null ) if ( takeFirstOnly && ! values.isEmpty() )
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly ); break;
} }
return contexts; return values;
}
public Collection<VariantContext> getVariantContextsByPrefix(ReferenceContext ref, Collection<String> names, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
Collection<VariantContext> contexts = new ArrayList<VariantContext>();
for ( String name : names ) {
RODRecordList rodList = getTrackDataByName(name,false); // require that the name is an exact match
if ( rodList != null )
addVariantContexts(contexts, rodList, ref, allowedTypes, curLocation, requireStartHere, takeFirstOnly );
}
return contexts;
}
/**
* Gets the variant context associated with name, and assumes the system only has a single bound track at this location. Throws an exception if not.
* see getVariantContexts for more information.
*
* @param name name
* @param curLocation location
* @param allowedTypes allowed types
* @param requireStartHere do we require the rod to start at this location?
* @return variant context
*/
public VariantContext getVariantContext(ReferenceContext ref, String name, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere ) {
Collection<VariantContext> contexts = getVariantContexts(ref, name, allowedTypes, curLocation, requireStartHere, false );
if ( contexts.size() > 1 )
throw new ReviewedStingException("Requested a single VariantContext object for track " + name + " but multiple variants were present at position " + curLocation);
else if ( contexts.size() == 0 )
return null;
else
return contexts.iterator().next();
}
/**
* Very simple accessor that gets the first (and only!) VC associated with name at the current location, or
* null if there's no binding here.
*
* @param ref
* @param name
* @param curLocation
* @return
*/
public VariantContext getVariantContext(ReferenceContext ref, String name, GenomeLoc curLocation) {
return getVariantContext(ref, name, null, curLocation, true);
} }
private void addVariantContexts(Collection<VariantContext> contexts, RODRecordList rodList, ReferenceContext ref, EnumSet<VariantContext.Type> allowedTypes, GenomeLoc curLocation, boolean requireStartHere, boolean takeFirstOnly ) {
private <T extends Feature> List<T> addValues(final String name,
final Class<T> type,
List<T> values,
final RODRecordList rodList,
final GenomeLoc curLocation,
final boolean requireStartHere,
final boolean takeFirstOnly ) {
for ( GATKFeature rec : rodList ) { for ( GATKFeature rec : rodList ) {
if ( VariantContextAdaptors.canBeConvertedToVariantContext(rec.getUnderlyingObject()) ) { if ( ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart() ) { // ok, we are going to keep this thing
// ok, we might actually be able to turn this record in a variant context Object obj = rec.getUnderlyingObject();
VariantContext vc = VariantContextAdaptors.toVariantContext(rodList.getName(), rec.getUnderlyingObject(), ref); if (!(type.isAssignableFrom(obj.getClass())))
throw new UserException.CommandLineException("Unable to cast track named " + name + " to type of " + type.toString()
+ " it's of type " + obj.getClass());
if ( vc == null ) // sometimes the track has odd stuff in it that can't be converted T objT = (T)obj;
continue; if ( takeFirstOnly ) {
if ( values == null )
values = Arrays.asList(objT);
else
values.add(objT);
// now, let's decide if we want to keep it break;
boolean goodType = allowedTypes == null || allowedTypes.contains(vc.getType()); } else {
boolean goodPos = ! requireStartHere || rec.getLocation().getStart() == curLocation.getStart(); if ( values == null )
values = new ArrayList<T>();
if ( goodType && goodPos ) { // ok, we are going to keep this thing values.add(objT);
contexts.add(vc);
if ( takeFirstOnly )
// we only want the first passing instance, so break the loop over records in rodList
break;
} }
} }
} }
return values == null ? Collections.<T>emptyList() : values;
} }
/** /**
* Finds the reference metadata track named 'name' and returns all ROD records from that track associated * Finds the reference metadata track named 'name' and returns all ROD records from that track associated
* with the current site as a RODRecordList collection object. If no data track with specified name is available, * with the current site as a RODRecordList List object. If no data track with specified name is available,
* returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up * returns defaultValue wrapped as RODRecordList object. NOTE: if defaultValue is null, it will be wrapped up
* with track name set to 'name' and location set to null; otherwise the wrapper object will have name and * with track name set to 'name' and location set to null; otherwise the wrapper object will have name and
* location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution, * location set to defaultValue.getName() and defaultValue.getLocation(), respectively (use caution,
@ -367,29 +423,16 @@ public class RefMetaDataTracker {
* for instance, on locus traversal, location is usually expected to be a single base we are currently looking at, * for instance, on locus traversal, location is usually expected to be a single base we are currently looking at,
* regardless of the presence of "extended" RODs overlapping with that location). * regardless of the presence of "extended" RODs overlapping with that location).
* @param name track name * @param name track name
* @param requireExactMatch do we require an exact match of the rod name?
* @return track data for the given rod * @return track data for the given rod
*/ */
private RODRecordList getTrackDataByName(final String name, boolean requireExactMatch) { private RODRecordList getTrackDataByName(final String name) {
//logger.debug(String.format("Lookup %s%n", name));
final String luName = canonicalName(name); final String luName = canonicalName(name);
RODRecordList trackData = null; RODRecordList l = map.get(luName);
return l == null ? EMPTY_ROD_RECORD_LIST : l;
}
if ( requireExactMatch ) { private RODRecordList getTrackDataByName(final RodBinding binding) {
if ( map.containsKey(luName) ) return getTrackDataByName(binding.getName());
trackData = map.get(luName);
} else {
for ( Map.Entry<String, RODRecordList> datum : map.entrySet() ) {
final String rodName = datum.getKey();
if ( datum.getValue() != null && rodName.startsWith(luName) ) {
if ( trackData == null ) trackData = new RODRecordListImpl(name);
//System.out.printf("Adding bindings from %s to %s at %s%n", rodName, name, datum.getValue().getLocation());
((RODRecordListImpl)trackData).add(datum.getValue(), true);
}
}
}
return trackData;
} }
/** /**
@ -398,6 +441,7 @@ public class RefMetaDataTracker {
* @return canonical name of the rod * @return canonical name of the rod
*/ */
private final String canonicalName(final String name) { private final String canonicalName(final String name) {
// todo -- remove me after switch to RodBinding syntax
return name.toLowerCase(); return name.toLowerCase();
} }
} }

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.gatk.refdata;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.*;
import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* Class for representing arbitrary reference ordered data sets
* <p/>
* User: mdepristo
* Date: Feb 27, 2009
* Time: 10:47:14 AM
* To change this template use File | Settings | File Templates.
*/
public class ReferenceOrderedData<ROD extends ReferenceOrderedDatum> implements Iterable<ReferenceOrderedDatum> {
private String name;
private File file = null;
// private String fieldDelimiter;
/** Header object returned from the datum */
// private Object header = null;
private Class<ROD> type = null; // runtime type information for object construction
/** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(ReferenceOrderedData.class);
/**
* given an existing file, open it and append all the valid triplet lines to an existing list
*
* @param rodTripletList the list of existing triplets
* @param filename the file to attempt to extract ROD triplets from
*/
protected static void extractRodsFromFile(List<String> rodTripletList, String filename) {
BufferedReader str;
try {
str = new BufferedReader(new FileReader(new File(filename)));
} catch (FileNotFoundException e) {
throw new UserException.CouldNotReadInputFile(new File(filename), "Unable to load the ROD input file", e);
}
String line = "NO LINES READ IN";
try {
while ((line = str.readLine()) != null) {
if (line.matches(".+,.+,.+")) rodTripletList.add(line.trim());
else logger.warn("the following file line didn't parsing into a triplet -> " + line);
}
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(new File(filename), "Failed reading the input rod file; last line read was " + line, e);
}
}
// ----------------------------------------------------------------------
//
// Constructors
//
// ----------------------------------------------------------------------
public ReferenceOrderedData(final String name, File file, Class<ROD> type ) {
this.name = name;
this.file = file;
this.type = type;
// this.header = initializeROD(name, file, type);
// this.fieldDelimiter = newROD(name, type).delimiterRegex();
}
public String getName() { return name; }
public File getFile() { return file; }
public Class<ROD> getType() { return type; }
/**
* Special equals override to see if this ROD is compatible with the given
* name and type. 'Compatible' means that this ROD has the name that's passed
* in and its data can fit into the container specified by type.
*
* @param name Name to check.
* @param type Type to check.
*
* @return True if these parameters imply this rod. False otherwise.
*/
public boolean matches(String name, Class<? extends ReferenceOrderedDatum> type) {
return this.name.equals(name) && type.isAssignableFrom(this.type);
}
public Iterator<ReferenceOrderedDatum> iterator() {
Iterator<ReferenceOrderedDatum> it;
try {
Method m = type.getDeclaredMethod("createIterator", String.class, java.io.File.class);
it = (Iterator<ReferenceOrderedDatum>) m.invoke(null, name, file);
} catch (java.lang.NoSuchMethodException e) {
it = new RODRecordIterator(file,name,type);
} catch (java.lang.NullPointerException e) {
throw new RuntimeException(e);
} catch (java.lang.SecurityException e) {
throw new RuntimeException(e);
} catch (java.lang.IllegalAccessException e) {
throw new RuntimeException(e);
} catch (java.lang.IllegalArgumentException e) {
throw new RuntimeException(e);
} catch (java.lang.reflect.InvocationTargetException e) {
throw new RuntimeException(e);
}
// return new RODIterator<ROD>(it);
return it;
}
// ----------------------------------------------------------------------
//
// Manipulations of all of the data
//
// ----------------------------------------------------------------------
public static void write(ArrayList<ReferenceOrderedDatum> data, File output) throws IOException {
final FileWriter out = new FileWriter(output);
for (ReferenceOrderedDatum rec : data) {
out.write(rec.repl() + "\n");
}
out.close();
}
}

View File

@ -0,0 +1,48 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata;
import java.io.File;
/**
* An interface marking that a given Tribble codec can look at the file and determine whether the
* codec specifically parsing the contents of the file.
*/
public interface SelfScopingFeatureCodec {
/**
* This function returns true iff the File potentialInput can be parsed by this
* codec.
*
* The GATK assumes that there's never a situation where two SelfScopingFeaetureCodecs
* return true for the same file. If this occurs the GATK splits out an error.
*
* Note this function must never throw an error. All errors should be trapped
* and false returned.
*
* @param potentialInput the file to test for parsiability with this codec
* @return true if potentialInput can be parsed, false otherwise
*/
public boolean canDecode(final File potentialInput);
}

View File

@ -1,12 +1,13 @@
package org.broadinstitute.sting.gatk.refdata; package org.broadinstitute.sting.gatk.refdata;
import net.sf.samtools.util.SequenceUtil;
import org.broad.tribble.Feature; import org.broad.tribble.Feature;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.annotation.Strand;
import org.broad.tribble.dbsnp.OldDbSNPFeature;
import org.broad.tribble.gelitext.GeliTextFeature; import org.broad.tribble.gelitext.GeliTextFeature;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.hapmap.HapMapFeature; import org.broadinstitute.sting.utils.codecs.hapmap.RawHapMapFeature;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.variantcontext.*; import org.broadinstitute.sting.utils.variantcontext.*;
@ -92,28 +93,89 @@ public class VariantContextAdaptors {
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
private static class DBSnpAdaptor implements VCAdaptor { private static class DBSnpAdaptor implements VCAdaptor {
private static boolean isSNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
}
private static boolean isMNP(OldDbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
}
private static boolean isInsertion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("insertion");
}
private static boolean isDeletion(OldDbSNPFeature feature) {
return feature.getVariantType().contains("deletion");
}
private static boolean isIndel(OldDbSNPFeature feature) {
return isInsertion(feature) || isDeletion(feature) || isComplexIndel(feature);
}
public static boolean isComplexIndel(OldDbSNPFeature feature) {
return feature.getVariantType().contains("in-del");
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public static List<String> getAlternateAlleleList(OldDbSNPFeature feature) {
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
return ret;
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public static List<String> getAlleleList(OldDbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>();
// add ref first
if ( feature.getStrand() == Strand.POSITIVE )
alleleList = Arrays.asList(feature.getObserved());
else
for (String str : feature.getObserved())
alleleList.add(SequenceUtil.reverseComplement(str));
if ( alleleList.size() > 0 && alleleList.contains(feature.getNCBIRefBase())
&& !alleleList.get(0).equals(feature.getNCBIRefBase()) )
Collections.swap(alleleList, alleleList.indexOf(feature.getNCBIRefBase()), 0);
return alleleList;
}
/** /**
* Converts non-VCF formatted dbSNP records to VariantContext. * Converts non-VCF formatted dbSNP records to VariantContext.
* @return DbSNPFeature. * @return OldDbSNPFeature.
*/ */
@Override @Override
public Class<? extends Feature> getAdaptableFeatureType() { return DbSNPFeature.class; } public Class<? extends Feature> getAdaptableFeatureType() { return OldDbSNPFeature.class; }
@Override @Override
public VariantContext convert(String name, Object input, ReferenceContext ref) { public VariantContext convert(String name, Object input, ReferenceContext ref) {
DbSNPFeature dbsnp = (DbSNPFeature)input; OldDbSNPFeature dbsnp = (OldDbSNPFeature)input;
if ( ! Allele.acceptableAlleleBases(DbSNPHelper.getReference(dbsnp)) ) if ( ! Allele.acceptableAlleleBases(dbsnp.getNCBIRefBase()) )
return null; return null;
Allele refAllele = Allele.create(DbSNPHelper.getReference(dbsnp), true); Allele refAllele = Allele.create(dbsnp.getNCBIRefBase(), true);
if ( DbSNPHelper.isSNP(dbsnp) || DbSNPHelper.isIndel(dbsnp) || DbSNPHelper.isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) { if ( isSNP(dbsnp) || isIndel(dbsnp) || isMNP(dbsnp) || dbsnp.getVariantType().contains("mixed") ) {
// add the reference allele // add the reference allele
List<Allele> alleles = new ArrayList<Allele>(); List<Allele> alleles = new ArrayList<Allele>();
alleles.add(refAllele); alleles.add(refAllele);
// add all of the alt alleles // add all of the alt alleles
boolean sawNullAllele = false; boolean sawNullAllele = refAllele.isNull();
for ( String alt : DbSNPHelper.getAlternateAlleleList(dbsnp) ) { for ( String alt : getAlternateAlleleList(dbsnp) ) {
if ( ! Allele.acceptableAlleleBases(alt) ) { if ( ! Allele.acceptableAlleleBases(alt) ) {
//System.out.printf("Excluding dbsnp record %s%n", dbsnp); //System.out.printf("Excluding dbsnp record %s%n", dbsnp);
return null; return null;
@ -127,14 +189,13 @@ public class VariantContextAdaptors {
Map<String, Object> attributes = new HashMap<String, Object>(); Map<String, Object> attributes = new HashMap<String, Object>();
attributes.put(VariantContext.ID_KEY, dbsnp.getRsID()); attributes.put(VariantContext.ID_KEY, dbsnp.getRsID());
if ( sawNullAllele ) { int index = dbsnp.getStart() - ref.getWindow().getStart() - 1;
int index = dbsnp.getStart() - ref.getWindow().getStart() - 1; if ( index < 0 )
if ( index < 0 ) return null; // we weren't given enough reference context to create the VariantContext
return null; // we weren't given enough reference context to create the VariantContext Byte refBaseForIndel = new Byte(ref.getBases()[index]);
attributes.put(VariantContext.REFERENCE_BASE_FOR_INDEL_KEY, new Byte(ref.getBases()[index]));
} Map<String, Genotype> genotypes = null;
Collection<Genotype> genotypes = null; VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0), dbsnp.getEnd() - (refAllele.isNull() ? 1 : 0), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes, refBaseForIndel);
VariantContext vc = new VariantContext(name, dbsnp.getChr(), dbsnp.getStart() - (sawNullAllele ? 1 : 0),dbsnp.getEnd(), alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attributes);
return vc; return vc;
} else } else
return null; // can't handle anything else return null; // can't handle anything else
@ -164,16 +225,6 @@ public class VariantContextAdaptors {
@Override @Override
public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; } public Class<? extends Feature> getAdaptableFeatureType() { return GeliTextFeature.class; }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
/** /**
* convert to a Variant Context, given: * convert to a Variant Context, given:
* @param name the name of the ROD * @param name the name of the ROD
@ -237,17 +288,7 @@ public class VariantContextAdaptors {
* @return HapMapFeature. * @return HapMapFeature.
*/ */
@Override @Override
public Class<? extends Feature> getAdaptableFeatureType() { return HapMapFeature.class; } public Class<? extends Feature> getAdaptableFeatureType() { return RawHapMapFeature.class; }
/**
* convert to a Variant Context, given:
* @param name the name of the ROD
* @param input the Rod object, in this case a RodGeliText
* @return a VariantContext object
*/
// VariantContext convert(String name, Object input) {
// return convert(name, input, null);
// }
/** /**
* convert to a Variant Context, given: * convert to a Variant Context, given:
@ -261,7 +302,12 @@ public class VariantContextAdaptors {
if ( ref == null ) if ( ref == null )
throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context"); throw new UnsupportedOperationException("Conversion from HapMap to VariantContext requires a reference context");
HapMapFeature hapmap = (HapMapFeature)input; RawHapMapFeature hapmap = (RawHapMapFeature)input;
int index = hapmap.getStart() - ref.getWindow().getStart();
if ( index < 0 )
return null; // we weren't given enough reference context to create the VariantContext
Byte refBaseForIndel = new Byte(ref.getBases()[index]);
HashSet<Allele> alleles = new HashSet<Allele>(); HashSet<Allele> alleles = new HashSet<Allele>();
Allele refSNPAllele = Allele.create(ref.getBase(), true); Allele refSNPAllele = Allele.create(ref.getBase(), true);
@ -271,7 +317,7 @@ public class VariantContextAdaptors {
// use the actual alleles, if available // use the actual alleles, if available
if ( alleleMap != null ) { if ( alleleMap != null ) {
alleles.addAll(alleleMap.values()); alleles.addAll(alleleMap.values());
Allele deletionAllele = alleleMap.get(HapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases) Allele deletionAllele = alleleMap.get(RawHapMapFeature.INSERTION); // yes, use insertion here (since we want the reference bases)
if ( deletionAllele != null && deletionAllele.isReference() ) if ( deletionAllele != null && deletionAllele.isReference() )
deletionLength = deletionAllele.length(); deletionLength = deletionAllele.length();
} else { } else {
@ -321,7 +367,7 @@ public class VariantContextAdaptors {
long end = hapmap.getEnd(); long end = hapmap.getEnd();
if ( deletionLength > 0 ) if ( deletionLength > 0 )
end += deletionLength; end += deletionLength;
VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs); VariantContext vc = new VariantContext(name, hapmap.getChr(), hapmap.getStart(), end, alleles, genotypes, VariantContext.NO_NEG_LOG_10PERROR, null, attrs, refBaseForIndel);
return vc; return vc;
} }
} }

View File

@ -1,193 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.exception.CodecLineParsingException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.StringTokenizer;
public class AnnotatorInputTableCodec implements ReferenceDependentFeatureCodec<AnnotatorInputTableFeature> {
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
public static final String DELIMITER = "\t";
private ArrayList<String> header;
/**
* The parser to use when resolving genome-wide locations.
*/
private GenomeLocParser genomeLocParser;
/**
* Set the parser to use when resolving genetic data.
* @param genomeLocParser The supplied parser.
*/
public void setGenomeLocParser(GenomeLocParser genomeLocParser) {
this.genomeLocParser = genomeLocParser;
}
/**
* Parses the header.
*
* @param reader
*
* @return The # of header lines for this file.
*/
public Object readHeader(LineReader reader)
{
int[] lineCounter = new int[1];
try {
header = readHeader(reader, lineCounter);
} catch(IOException e) {
throw new IllegalArgumentException("Unable to read from file.", e);
}
return header;
}
public Class<AnnotatorInputTableFeature> getFeatureType() {
return AnnotatorInputTableFeature.class;
}
@Override
public Feature decodeLoc(String line) {
StringTokenizer st = new StringTokenizer(line, DELIMITER);
if ( st.countTokens() < 1 )
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
GenomeLoc loc;
String chr = st.nextToken();
if ( chr.indexOf(":") != -1 ) {
loc = genomeLocParser.parseGenomeLoc(chr);
} else {
if ( st.countTokens() < 3 )
throw new CodecLineParsingException("Couldn't parse GenomeLoc out of the following line because there aren't enough tokens.\nLine: " + line);
loc = genomeLocParser.createGenomeLoc(chr, Integer.valueOf(st.nextToken()), Integer.valueOf(st.nextToken()));
}
return new AnnotatorInputTableFeature(loc.getContig(), loc.getStart(), loc.getStop());
}
/**
* Parses the line into an AnnotatorInputTableFeature object.
*
* @param line
*/
public AnnotatorInputTableFeature decode(String line) {
final ArrayList<String> header = this.header; //optimization
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
if ( values.size() != header.size()) {
throw new CodecLineParsingException(String.format("Encountered a line that has %d columns while the header has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
}
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
for ( int i = 0; i < header.size(); i++ ) {
feature.putColumnValue(header.get(i), values.get(i));
}
GenomeLoc loc;
if ( values.get(0).indexOf(":") != -1 )
loc = genomeLocParser.parseGenomeLoc(values.get(0));
else
loc = genomeLocParser.createGenomeLoc(values.get(0), Integer.valueOf(values.get(1)), Integer.valueOf(values.get(2)));
//parse the location
feature.setChr(loc.getContig());
feature.setStart((int)loc.getStart());
feature.setEnd((int)loc.getStop());
return feature;
}
/**
* Returns the header.
* @param source
* @return
* @throws IOException
*/
public static ArrayList<String> readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
}
/**
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
* @param source
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
* @return The header fields.
* @throws IOException
*/
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
ArrayList<String> header = null;
int numLines = 0;
//find the 1st line that's non-empty and not a comment
String line = null;
while( (line = source.readLine()) != null ) {
numLines++;
if ( line.trim().isEmpty() || line.startsWith("#") ) {
continue;
}
//parse the header
header = Utils.split(line, DELIMITER);
break;
}
// check that we found the header
if ( header == null ) {
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
}
if(lineCounter != null) {
lineCounter[0] = numLines;
}
logger.debug(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
return header;
}
}

View File

@ -1,158 +0,0 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import org.broad.tribble.Feature;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* This class represents a single record in an AnnotatorInputTable.
*/
public class AnnotatorInputTableFeature implements Feature {
private ArrayList<String> columnNames;
private HashMap<String, String> columnValues; //maps colum names to column values
private String chr;
private int start;
private int end;
private String strRep = null;
/**
* Constructor.
* @param chr The chromosome name.
* @param start The start position
* @param end The end position
*/
public AnnotatorInputTableFeature(String chr, int start, int end) {
this.chr = chr;
this.start = start;
this.end = end;
}
/**
* Constructor.
* @param columnNames The column names as parsed out of the file header.
*/
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
this.columnNames = columnNames;
this.columnValues = new HashMap<String, String>();
}
/**
* @return the list of column names from the file header.
*/
public ArrayList<String> getHeader() {
return columnNames;
}
/**
* Returns the value of the given column.
*
* @param columnName The column name as it appears in the file header.
* @return The value
*/
public String getColumnValue(final String columnName) {
return columnValues.get(columnName);
}
public boolean containsColumnName(final String columnName) {
return columnValues.containsKey(columnName);
}
/**
* Sets the value for the given column.
*
* @param columnName The column name as it appears in the file header.
* @param value The value
* @return The existing value associated with the columnName, if there is one.
*/
protected String putColumnValue(final String columnName, final String value) {
return columnValues.put(columnName, value);
}
/**
* @return all values in this line, hashed by their column names.
*/
public Map<String,String> getColumnValues() {
return Collections.unmodifiableMap(columnValues);
}
public String getChr() {
return chr;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
protected void setChr(String chr) {
this.chr = chr;
}
protected void setStart(int start) {
this.start = start;
}
protected void setEnd(int end) {
this.end = end;
}
@Override
public String toString() {
if ( strRep == null ) {
StringBuilder sb = new StringBuilder();
for(String columnName : columnNames ) {
if ( sb.length() == 0 )
sb.append("[");
else
sb.append(", ");
sb.append(columnName + "=" + columnValues.get(columnName));
}
sb.append("]");
strRep = sb.toString();
}
return strRep;
}
}

View File

@ -12,14 +12,13 @@ import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.builders.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile; import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.util.Map;
/** /**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory * a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
@ -83,14 +82,14 @@ public class RMDIndexer extends CommandLineProgram {
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL); RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders // find the types available to the track builders
Map<String,Class> typeMapping = builder.getAvailableTrackNamesAndTypes(); FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid // check that the type is valid
if (!typeMapping.containsKey(inputFileType)) if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + Utils.join(",",typeMapping.keySet())); throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec // create the codec
FeatureCodec codec = builder.createByType(typeMapping.get(inputFileType)); FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec // check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec) if (codec instanceof ReferenceDependentFeatureCodec)

View File

@ -0,0 +1,255 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.NameAwareCodec;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.help.GATKDocUtils;
import org.broadinstitute.sting.utils.help.HelpUtils;
import javax.mail.Header;
import java.io.File;
import java.util.*;
/**
* Class for managing Tribble Feature readers available to the GATK. The features
* are dynamically determined via a PluginManager. This class provides convenient
* getter methods for obtaining FeatureDescriptor objects that collect all of the
* useful information about the Tribble Codec, Feature, and name in one place.
*
* @author depristo
*/
public class FeatureManager {
public static class FeatureDescriptor implements Comparable<FeatureDescriptor> {
final String name;
final FeatureCodec codec;
public FeatureDescriptor(final String name, final FeatureCodec codec) {
this.name = name;
this.codec = codec;
}
public String getName() {
return name;
}
public String getSimpleFeatureName() { return getFeatureClass().getSimpleName(); }
public FeatureCodec getCodec() {
return codec;
}
public Class getCodecClass() { return codec.getClass(); }
public Class getFeatureClass() { return codec.getFeatureType(); }
@Override
public String toString() {
return String.format("FeatureDescriptor name=%s codec=%s feature=%s",
getName(), getCodecClass().getName(), getFeatureClass().getName());
}
@Override
public int compareTo(FeatureDescriptor o) {
return getName().compareTo(o.getName());
}
}
private final PluginManager<FeatureCodec> pluginManager;
private final Collection<FeatureDescriptor> featureDescriptors = new TreeSet<FeatureDescriptor>();
/**
* Construct a FeatureManager
*/
public FeatureManager() {
pluginManager = new PluginManager<FeatureCodec>(FeatureCodec.class, "Codecs", "Codec");
for (final String rawName: pluginManager.getPluginsByName().keySet()) {
FeatureCodec codec = pluginManager.createByName(rawName);
String name = rawName.toUpperCase();
FeatureDescriptor featureDescriptor = new FeatureDescriptor(name, codec);
featureDescriptors.add(featureDescriptor);
}
}
/**
* Return the FeatureDescriptor whose getCodecClass().equals(codecClass).
*
* @param codecClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("codecClass != null")
public FeatureDescriptor getByCodec(Class codecClass) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodecClass().equals(codecClass) )
return descriptor;
return null;
}
/**
* Returns a collection of FeatureDescriptors that emit records of type featureClass
*
* @param featureClass
* @return A FeatureDescriptor or null if none is found
*/
@Requires("featureClass != null")
public <T extends Feature> Collection<FeatureDescriptor> getByFeature(Class<T> featureClass) {
Set<FeatureDescriptor> consistentDescriptors = new TreeSet<FeatureDescriptor>();
if (featureClass == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for ( FeatureDescriptor descriptor : featureDescriptors ) {
if ( featureClass.isAssignableFrom(descriptor.getFeatureClass()))
consistentDescriptors.add(descriptor);
}
return consistentDescriptors;
}
/**
* Return the FeatureDescriptor with getName().equals(name)
*
* @param name
* @return A FeatureDescriptor or null if none is found
*/
@Requires("name != null")
public FeatureDescriptor getByName(String name) {
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getName().equalsIgnoreCase(name) )
return descriptor;
return null;
}
/**
* Returns the FeatureDescriptor that can read the contexts of File file, is one can be determined
*
* @param file
* @return A FeatureDescriptor or null if none is found
*/
@Requires({"file != null", "file.isFile()", "file.canRead()"})
public FeatureDescriptor getByFiletype(File file) {
List<FeatureDescriptor> canParse = new ArrayList<FeatureDescriptor>();
for ( FeatureDescriptor descriptor : featureDescriptors )
if ( descriptor.getCodec() instanceof SelfScopingFeatureCodec ) {
if ( ((SelfScopingFeatureCodec) descriptor.getCodec()).canDecode(file) ) {
canParse.add(descriptor);
}
}
if ( canParse.size() == 0 )
return null;
else if ( canParse.size() > 1 )
throw new ReviewedStingException("BUG: multiple feature descriptors can read file " + file + ": " + canParse);
else
return canParse.get(0);
}
/**
* Returns the FeatureDescriptor associated with the type described by triplet, or null if none is found
* @param triplet
* @return
*/
@Requires("triplet != null")
public FeatureDescriptor getByTriplet(RMDTriplet triplet) {
return getByName(triplet.getType());
}
/**
* @return all of the FeatureDescriptors available to the GATK. Never null
*/
@Ensures("result != null")
public Collection<FeatureDescriptor> getFeatureDescriptors() {
return Collections.unmodifiableCollection(featureDescriptors);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures() {
return userFriendlyListOfAvailableFeatures(Feature.class);
}
/**
* Returns a list of the available tribble track names (vcf,dbsnp,etc) that we can load
* restricted to only Codecs producting Features consistent with the requiredFeatureType
* @return
*/
@Ensures("result != null")
public String userFriendlyListOfAvailableFeatures(Class<? extends Feature> requiredFeatureType) {
final String nameHeader="Name", featureHeader = "FeatureType", docHeader="Documentation";
int maxNameLen = nameHeader.length(), maxFeatureNameLen = featureHeader.length();
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
maxNameLen = Math.max(maxNameLen, descriptor.getName().length());
maxFeatureNameLen = Math.max(maxFeatureNameLen, descriptor.getSimpleFeatureName().length());
}
}
StringBuilder docs = new StringBuilder();
String format = "%" + maxNameLen + "s %" + maxFeatureNameLen + "s %s%n";
docs.append(String.format(format, nameHeader, featureHeader, docHeader));
for ( final FeatureDescriptor descriptor : featureDescriptors ) {
if ( requiredFeatureType.isAssignableFrom(descriptor.getFeatureClass()) ) {
String oneDoc = String.format(format,
descriptor.getName(),
descriptor.getSimpleFeatureName(),
GATKDocUtils.helpLinksToGATKDocs(descriptor.getCodecClass()));
docs.append(oneDoc);
}
}
return docs.toString();
}
/**
* Create a new FeatureCodec of the type described in descriptor, assigning it the
* name (if possible) and providing it the genomeLocParser (where necessary)
*
* @param descriptor FeatureDescriptor of the Tribble FeatureCodec we want to create
* @param name the name to assign this codec
* @return the feature codec itself
*/
@Requires({"descriptor != null", "name != null", "genomeLocParser != null"})
@Ensures("result != null")
public FeatureCodec createCodec(FeatureDescriptor descriptor, String name, GenomeLocParser genomeLocParser) {
FeatureCodec codex = pluginManager.createByType(descriptor.getCodecClass());
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if ( codex instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
}
}

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.io.IOException;
/**
* @author aaron
* <p/>
* Interface QueryableTrack
* <p/>
* a decorator interface for tracks that are queryable
*/
public interface QueryableTrack {
public CloseableIterator<GATKFeature> query(final GenomeLoc interval) throws IOException;
public CloseableIterator<GATKFeature> query(final GenomeLoc interval, final boolean contained) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop) throws IOException;
public CloseableIterator<GATKFeature> query(final String contig, final int start, final int stop, final boolean contained) throws IOException;
public void close();
}

View File

@ -25,8 +25,12 @@ package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.util.CloseableIterator; import net.sf.samtools.util.CloseableIterator;
import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource; import org.broad.tribble.FeatureSource;
import org.broad.tribble.iterators.CloseableTribbleIterator;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator; import org.broadinstitute.sting.gatk.refdata.utils.FeatureToGATKFeatureIterator;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature; import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -45,10 +49,10 @@ import java.io.IOException;
* the basics of what a reference metadata track must contain. * the basics of what a reference metadata track must contain.
*/ */
public class RMDTrack { public class RMDTrack {
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
// the basics of a track: // the basics of a track:
private final Class type; // our type private final Class type; // our type
private final Class recordType; // the underlying records that are produced by this track
private final String name; // the name private final String name; // the name
private final File file; // the associated file we create the reader from private final File file; // the associated file we create the reader from
@ -90,7 +94,6 @@ public class RMDTrack {
*/ */
public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) { public RMDTrack(Class type, String name, File file, FeatureSource reader, SAMSequenceDictionary dict, GenomeLocParser genomeLocParser, FeatureCodec codec) {
this.type = type; this.type = type;
this.recordType = codec.getFeatureType();
this.name = name; this.name = name;
this.file = file; this.file = file;
this.reader = reader; this.reader = reader;
@ -112,19 +115,10 @@ public class RMDTrack {
} }
public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException { public CloseableIterator<GATKFeature> query(GenomeLoc interval) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName()); CloseableTribbleIterator<Feature> iter = reader.query(interval.getContig(),interval.getStart(),interval.getStop());
} if ( RMDTrackBuilder.MEASURE_TRIBBLE_QUERY_PERFORMANCE )
logger.warn("Query " + getName() + ":" + ((PerformanceLoggingFeatureSource)reader).getPerformanceLog());
public CloseableIterator<GATKFeature> query(GenomeLoc interval, boolean contained) throws IOException { return new FeatureToGATKFeatureIterator(genomeLocParser, iter, this.getName());
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(interval.getContig(),interval.getStart(),interval.getStop()),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
}
public CloseableIterator<GATKFeature> query(String contig, int start, int stop, boolean contained) throws IOException {
return new FeatureToGATKFeatureIterator(genomeLocParser,reader.query(contig,start,stop),this.getName());
} }
public void close() { public void close() {

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2010 The Broad Institute * Copyright (c) 2011, The Broad Institute
* *
* Permission is hereby granted, free of charge, to any person * Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation * obtaining a copy of this software and associated documentation
@ -12,37 +12,36 @@
* *
* The above copyright notice and this permission notice shall be * The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software. * included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE. * OTHER DEALINGS IN THE SOFTWARE.
*/ */
package org.broadinstitute.sting.gatk.refdata.tracks.builders; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.*; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource;
import org.broad.tribble.Tribble;
import org.broad.tribble.TribbleException;
import org.broad.tribble.index.Index; import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory; import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.source.BasicFeatureSource; import org.broad.tribble.source.BasicFeatureSource;
import org.broad.tribble.source.PerformanceLoggingFeatureSource;
import org.broad.tribble.util.LittleEndianOutputStream; import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Tags; import org.broadinstitute.sting.commandline.Tags;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackCreationException;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils; import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -53,7 +52,10 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.*; import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
@ -67,17 +69,16 @@ import java.util.*;
* that gets iterators from the FeatureReader using Tribble. * that gets iterators from the FeatureReader using Tribble.
* *
*/ */
public class RMDTrackBuilder extends PluginManager<FeatureCodec> { public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
/** /**
* our log, which we use to capture anything from this class * our log, which we use to capture anything from this class
*/ */
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
// a constant we use for marking sequence dictionary entries in the Tribble index property list // a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:"; public static final String SequenceDictionaryPropertyPredicate = "DICT:";
private Map<String, Class> classes = null;
// private sequence dictionary we use to set our tracks with // private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null; private SAMSequenceDictionary dict = null;
@ -91,6 +92,8 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
*/ */
private ValidationExclusion.TYPE validationExclusionType; private ValidationExclusion.TYPE validationExclusionType;
FeatureManager featureManager;
/** /**
* Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally * Construct an RMDTrackerBuilder, allowing the user to define tracks to build after-the-fact. This is generally
* used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor, * used when walkers want to directly manage the ROD system for whatever reason. Before using this constructor,
@ -102,29 +105,14 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
public RMDTrackBuilder(SAMSequenceDictionary dict, public RMDTrackBuilder(SAMSequenceDictionary dict,
GenomeLocParser genomeLocParser, GenomeLocParser genomeLocParser,
ValidationExclusion.TYPE validationExclusionType) { ValidationExclusion.TYPE validationExclusionType) {
super(FeatureCodec.class, "Codecs", "Codec");
this.dict = dict; this.dict = dict;
this.genomeLocParser = genomeLocParser;
this.validationExclusionType = validationExclusionType; this.validationExclusionType = validationExclusionType;
this.genomeLocParser = genomeLocParser;
classes = new HashMap<String, Class>(); featureManager = new FeatureManager();
for (String name: this.getPluginsByName().keySet()) {
classes.put(name.toUpperCase(), getPluginsByName().get(name));
} }
/** @return a list of all available track types we currently have access to create */
public Map<String, Class> getAvailableTrackNamesAndTypes() {
return Collections.unmodifiableMap(classes);
} }
/** @return a list of all available track record types we currently have access to create */ public FeatureManager getFeatureManager() {
public Map<String, Class> getAvailableTrackNamesAndRecordTypes() { return featureManager;
HashMap classToRecord = new HashMap<String, Class>();
for (String name: this.getPluginsByName().keySet()) {
FeatureCodec codec = this.createByName(name);
classToRecord.put(name, codec.getFeatureType());
}
return classToRecord;
} }
/** /**
@ -133,45 +121,38 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
* @param fileDescriptor a description of the type of track to build. * @param fileDescriptor a description of the type of track to build.
* *
* @return an instance of the track * @return an instance of the track
* @throws RMDTrackCreationException
* if we don't know of the target class or we couldn't create it
*/ */
public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) throws RMDTrackCreationException { public RMDTrack createInstanceOfTrack(RMDTriplet fileDescriptor) {
String name = fileDescriptor.getName(); String name = fileDescriptor.getName();
File inputFile = new File(fileDescriptor.getFile()); File inputFile = new File(fileDescriptor.getFile());
Class featureCodecClass = getAvailableTrackNamesAndTypes().get(fileDescriptor.getType().toUpperCase()); FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByTriplet(fileDescriptor);
if (featureCodecClass == null) if (descriptor == null)
throw new UserException.BadArgumentValue("-B",fileDescriptor.getType()); throw new UserException.BadArgumentValue("-B",fileDescriptor.getType());
// return a feature reader track // return a feature reader track
Pair<FeatureSource, SAMSequenceDictionary> pair; Pair<FeatureSource, SAMSequenceDictionary> pair;
if (inputFile.getAbsolutePath().endsWith(".gz")) if (inputFile.getAbsolutePath().endsWith(".gz"))
pair = createTabixIndexedFeatureSource(featureCodecClass, name, inputFile); pair = createTabixIndexedFeatureSource(descriptor, name, inputFile);
else else
pair = getFeatureSource(featureCodecClass, name, inputFile, fileDescriptor.getStorageType()); pair = getFeatureSource(descriptor, name, inputFile, fileDescriptor.getStorageType());
if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file"); if (pair == null) throw new UserException.CouldNotReadInputFile(inputFile, "Unable to make the feature reader for input file");
return new RMDTrack(featureCodecClass, name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(featureCodecClass,name)); return new RMDTrack(descriptor.getCodecClass(), name, inputFile, pair.first, pair.second, genomeLocParser, createCodec(descriptor, name));
} }
/** /**
* Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream. * Convenience method simplifying track creation. Assume unnamed track based on a file rather than a stream.
* @param targetClass Type of Tribble class to build. * @param codecClass Type of Tribble codec class to build.
* @param inputFile Input file type to use. * @param inputFile Input file type to use.
* @return An RMDTrack, suitable for accessing reference metadata. * @return An RMDTrack, suitable for accessing reference metadata.
*/ */
public RMDTrack createInstanceOfTrack(Class targetClass, File inputFile) { public RMDTrack createInstanceOfTrack(Class codecClass, File inputFile) {
// TODO: Update RMDTriplet to contain an actual class object rather than a name to avoid these gymnastics. final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
String typeName = null;
for(Map.Entry<String,Class> trackType: getAvailableTrackNamesAndTypes().entrySet()) {
if(trackType.getValue().equals(targetClass))
typeName = trackType.getKey();
}
if(typeName == null) if (descriptor == null)
throw new ReviewedStingException("Unable to find type name for class " + targetClass.getName()); throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
return createInstanceOfTrack(new RMDTriplet("anonymous",typeName,inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
} }
/** /**
@ -179,16 +160,16 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
* reader of the appropriate type will figure out what the right index type is, and determine if it * reader of the appropriate type will figure out what the right index type is, and determine if it
* exists. * exists.
* *
* @param targetClass the codec class type * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the track * @param name the name of the track
* @param inputFile the file to load * @param inputFile the file to load
* @return a feature reader implementation * @return a feature reader implementation
*/ */
private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(Class targetClass, String name, File inputFile) { private Pair<FeatureSource, SAMSequenceDictionary> createTabixIndexedFeatureSource(FeatureManager.FeatureDescriptor descriptor, String name, File inputFile) {
// we might not know the index type, try loading with the default reader constructor // we might not know the index type, try loading with the default reader constructor
logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file"); logger.info("Attempting to blindly load " + inputFile + " as a tabix indexed file");
try { try {
return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(targetClass, name)),null); return new Pair<FeatureSource, SAMSequenceDictionary>(BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(), createCodec(descriptor, name)),null);
} catch (TribbleException e) { } catch (TribbleException e) {
throw new UserException(e.getMessage(), e); throw new UserException(e.getMessage(), e);
} }
@ -196,28 +177,26 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
/** /**
* add a name to the codec, if it takes one * add a name to the codec, if it takes one
* @param targetClass the class to create a codec for * @param descriptor the class to create a codec for
* @param name the name to assign this codec * @param name the name to assign this codec
* @return the feature codec itself * @return the feature codec itself
*/ */
public FeatureCodec createCodec(Class targetClass, String name) { private FeatureCodec createCodec(FeatureManager.FeatureDescriptor descriptor, String name) {
FeatureCodec codex = this.createByType(targetClass); return featureManager.createCodec(descriptor, name, genomeLocParser);
if ( codex instanceof NameAwareCodec )
((NameAwareCodec)codex).setName(name);
if(codex instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codex).setGenomeLocParser(genomeLocParser);
return codex;
} }
/** /**
* create a feature source object given: * create a feature source object given:
* @param targetClass the target class * @param descriptor the FeatureDescriptor describing the FeatureCodec we want to create
* @param name the name of the codec * @param name the name of the codec
* @param inputFile the tribble file to parse * @param inputFile the tribble file to parse
* @param storageType How the RMD is streamed into the input file. * @param storageType How the RMD is streamed into the input file.
* @return the input file as a FeatureReader * @return the input file as a FeatureReader
*/ */
private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(Class targetClass, String name, File inputFile, RMDStorageType storageType) { private Pair<FeatureSource, SAMSequenceDictionary> getFeatureSource(FeatureManager.FeatureDescriptor descriptor,
String name,
File inputFile,
RMDStorageType storageType) {
// Feature source and sequence dictionary to use as the ultimate reference // Feature source and sequence dictionary to use as the ultimate reference
FeatureSource featureSource = null; FeatureSource featureSource = null;
SAMSequenceDictionary sequenceDictionary = null; SAMSequenceDictionary sequenceDictionary = null;
@ -227,7 +206,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
if(canBeIndexed) { if(canBeIndexed) {
try { try {
Index index = loadIndex(inputFile, createCodec(targetClass, name)); Index index = loadIndex(inputFile, createCodec(descriptor, name));
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
catch (ReviewedStingException e) { } catch (ReviewedStingException e) { }
@ -240,7 +219,10 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
sequenceDictionary = getSequenceDictionaryFromProperties(index); sequenceDictionary = getSequenceDictionaryFromProperties(index);
} }
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(targetClass, name)); if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
featureSource = new PerformanceLoggingFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
else
featureSource = new BasicFeatureSource(inputFile.getAbsolutePath(), index, createCodec(descriptor, name));
} }
catch (TribbleException e) { catch (TribbleException e) {
throw new UserException(e.getMessage()); throw new UserException(e.getMessage());
@ -250,7 +232,7 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
} }
} }
else { else {
featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(targetClass, name),false); featureSource = BasicFeatureSource.getFeatureSource(inputFile.getAbsolutePath(),createCodec(descriptor, name),false);
} }
return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary); return new Pair<FeatureSource,SAMSequenceDictionary>(featureSource,sequenceDictionary);
@ -385,22 +367,6 @@ public class RMDTrackBuilder extends PluginManager<FeatureCodec> {
return idx; return idx;
} }
/**
* Returns a collection of track names that match the record type.
* @param trackRecordType the record type specified in the @RMD annotation
* @return a collection of available track record type names that match the record type
*/
public Collection<String> getTrackRecordTypeNames(Class trackRecordType) {
Set<String> names = new TreeSet<String>();
if (trackRecordType == null)
throw new IllegalArgumentException("trackRecordType value is null, please pass in an actual class object");
for (Map.Entry<String, Class> availableTrackRecordType: getAvailableTrackNamesAndRecordTypes().entrySet()) {
if (availableTrackRecordType.getValue() != null && trackRecordType.isAssignableFrom(availableTrackRecordType.getValue()))
names.add(availableTrackRecordType.getKey());
}
return names;
}
// --------------------------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------------------------
// static functions to work with the sequence dictionaries of indexes // static functions to work with the sequence dictionaries of indexes

View File

@ -1,45 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
/**
*
* @author aaron
*
* Class RMDTrackCreationException
*
* if we fail for some reason to make a track, throw this exception
*/
public class RMDTrackCreationException extends ReviewedStingException {
public RMDTrackCreationException(String msg) {
super(msg);
}
public RMDTrackCreationException(String message, Throwable throwable) {
super(message, throwable);
}
}

View File

@ -57,6 +57,7 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
public abstract GenomeLoc getLocation(); public abstract GenomeLoc getLocation();
// TODO: this should be a Feature
public abstract Object getUnderlyingObject(); public abstract Object getUnderlyingObject();
/** /**
@ -98,48 +99,9 @@ public abstract class GATKFeature implements Feature, HasGenomeLocation {
return feature.getEnd(); return feature.getEnd();
} }
// TODO: this should be a Feature, actually
public Object getUnderlyingObject() { public Object getUnderlyingObject() {
return feature; return feature;
} }
} }
/**
* wrapping a old style rod into the new GATK feature style
*/
public static class RODGATKFeature extends GATKFeature {
// our data
private ReferenceOrderedDatum datum;
public RODGATKFeature(ReferenceOrderedDatum datum) {
super(datum.getName());
this.datum = datum;
}
@Override
public GenomeLoc getLocation() {
return datum.getLocation();
}
@Override
public Object getUnderlyingObject() {
return datum;
}
@Override
public String getChr() {
return datum.getLocation().getContig();
}
@Override
public int getStart() {
return (int)datum.getLocation().getStart();
}
@Override
public int getEnd() {
return (int)datum.getLocation().getStop();
}
}
} }

View File

@ -1,65 +0,0 @@
/*
* Copyright (c) 2010. The Broad Institute
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.utils;
import net.sf.samtools.util.CloseableIterator;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import java.util.Iterator;
/**
*
* @author aaron
*
* Class GATKFeatureIterator
*
* Takes a RODatum iterator and makes it an iterator of GATKFeatures. Shazam!
*/
public class GATKFeatureIterator implements CloseableIterator<GATKFeature> {
private final Iterator<ReferenceOrderedDatum> iter;
public GATKFeatureIterator(Iterator<ReferenceOrderedDatum> iter) {
this.iter = iter;
}
@Override
public boolean hasNext() {
return iter.hasNext();
}
@Override
public GATKFeature next() {
return new GATKFeature.RODGATKFeature(iter.next());
}
@Override
public void remove() {
throw new UnsupportedOperationException("Remove not supported");
}
@Override
public void close() {
// do nothing, our underlying iterator doesn't support this
}
}

View File

@ -1,190 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.utils.helpers;
import net.sf.samtools.util.SequenceUtil;
import org.broad.tribble.annotation.Strand;
import org.broad.tribble.dbsnp.DbSNPFeature;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
/**
* this class contains static helper methods for DbSNP
*/
public class DbSNPHelper {
public static final String STANDARD_DBSNP_TRACK_NAME = "dbsnp";
private DbSNPHelper() {} // don't make a DbSNPHelper
public static DbSNPFeature getFirstRealSNP(List<Object> dbsnpList) {
if (dbsnpList == null)
return null;
DbSNPFeature dbsnp = null;
for (Object d : dbsnpList) {
if (d instanceof DbSNPFeature && DbSNPHelper.isSNP((DbSNPFeature)d)) {
dbsnp = (DbSNPFeature) d;
break;
}
}
return dbsnp;
}
public static String rsIDOfFirstRealSNP(List<Object> featureList) {
if (featureList == null)
return null;
String rsID = null;
for ( Object d : featureList ) {
if ( d instanceof DbSNPFeature ) {
if ( DbSNPHelper.isSNP((DbSNPFeature)d) ) {
rsID = ((DbSNPFeature)d).getRsID();
break;
}
} else if ( d instanceof VariantContext) {
if ( ((VariantContext)d).isSNP() ) {
rsID = ((VariantContext)d).getID();
break;
}
}
}
return rsID;
}
public static String rsIDOfFirstRealIndel(List<Object> featureList) {
if (featureList == null)
return null;
String rsID = null;
for ( Object d : featureList ) {
if ( d instanceof DbSNPFeature ) {
if ( DbSNPHelper.isIndel((DbSNPFeature)d) ) {
rsID = ((DbSNPFeature)d).getRsID();
break;
}
} else if ( d instanceof VariantContext) {
if ( ((VariantContext)d).isIndel() ) {
rsID = ((VariantContext)d).getID();
break;
}
}
}
return rsID;
}
/**
* get the -1 * (log 10 of the error value)
*
* @return the log based error estimate
*/
public static double getNegLog10PError(DbSNPFeature feature) {
return 4; // -log10(0.0001)
}
//
// What kind of variant are we?
//
// ----------------------------------------------------------------------
public static boolean isSNP(DbSNPFeature feature) {
return feature.getVariantType().contains("single") && feature.getLocationType().contains("exact");
}
public static boolean isMNP(DbSNPFeature feature) {
return feature.getVariantType().contains("mnp") && feature.getLocationType().contains("range");
}
public static String toMediumString(DbSNPFeature feature) {
String s = String.format("%s:%d:%s:%s", feature.getChr(), feature.getStart(), feature.getRsID(), Utils.join("",feature.getObserved()));
if (isSNP(feature)) s += ":SNP";
if (isIndel(feature)) s += ":Indel";
if (isHapmap(feature)) s += ":Hapmap";
if (is2Hit2Allele(feature)) s += ":2Hit";
return s;
}
public static boolean isInsertion(DbSNPFeature feature) {
return feature.getVariantType().contains("insertion");
}
public static boolean isDeletion(DbSNPFeature feature) {
return feature.getVariantType().contains("deletion");
}
public static boolean isIndel(DbSNPFeature feature) {
return DbSNPHelper.isInsertion(feature) || DbSNPHelper.isDeletion(feature) || DbSNPHelper.isComplexIndel(feature);
}
public static boolean isComplexIndel(DbSNPFeature feature) {
return feature.getVariantType().contains("in-del");
}
public static boolean isHapmap(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-hapmap");
}
public static boolean is2Hit2Allele(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-2hit-2allele");
}
public static boolean is1000genomes(DbSNPFeature feature) {
return feature.getValidationStatus().contains("by-1000genomes");
}
public static boolean isMQ1(DbSNPFeature feature) {
return feature.getWeight() == 1;
}
/**
* gets the alternate alleles. This method should return all the alleles present at the location,
* NOT including the reference base. This is returned as a string list with no guarantee ordering
* of alleles (i.e. the first alternate allele is not always going to be the allele with the greatest
* frequency).
*
* @return an alternate allele list
*/
public static List<String> getAlternateAlleleList(DbSNPFeature feature) {
List<String> ret = new ArrayList<String>();
for (String allele : getAlleleList(feature))
if (!allele.equals(String.valueOf(feature.getNCBIRefBase()))) ret.add(allele);
return ret;
}
public static boolean onFwdStrand(DbSNPFeature feature) {
return feature.getStrand() == Strand.POSITIVE;
}
public static String getReference(DbSNPFeature feature) {
return feature.getNCBIRefBase();
}
public static String toSimpleString(DbSNPFeature feature) {
return String.format("%s:%s:%s", feature.getRsID(), feature.getObserved(), (feature.getStrand() == Strand.POSITIVE) ? "+" : "-");
}
/**
* gets the alleles. This method should return all the alleles present at the location,
* including the reference base. The first allele should always be the reference allele, followed
* by an unordered list of alternate alleles.
*
* @return an alternate allele list
*/
public static List<String> getAlleleList(DbSNPFeature feature) {
List<String> alleleList = new ArrayList<String>();
// add ref first
if ( onFwdStrand(feature) )
alleleList = Arrays.asList(feature.getObserved());
else
for (String str : feature.getObserved())
alleleList.add(SequenceUtil.reverseComplement(str));
if ( alleleList.size() > 0 && alleleList.contains(getReference(feature)) && !alleleList.get(0).equals(getReference(feature)) )
Collections.swap(alleleList, alleleList.indexOf(getReference(feature)), 0);
return alleleList;
}
}

View File

@ -1,21 +1,25 @@
package org.broadinstitute.sting.gatk.report; package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.StingException; import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.text.TextFormattingUtils;
import java.io.*; import java.io.*;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap; import java.util.TreeMap;
/** /**
* Container class for GATK report tables * Container class for GATK report tables
*/ */
public class GATKReport { public class GATKReport {
private TreeMap<String, GATKReportTable> tables; public static final String GATKREPORT_HEADER_PREFIX = "##:GATKReport.v";
private TreeMap<String, GATKReportTable> tables = new TreeMap<String, GATKReportTable>();
/** /**
* Create a new, empty GATKReport. * Create a new, empty GATKReport.
*/ */
public GATKReport() { public GATKReport() {
tables = new TreeMap<String, GATKReportTable>();
} }
/** /**
@ -23,7 +27,7 @@ public class GATKReport {
* @param filename the path to the file to load * @param filename the path to the file to load
*/ */
public GATKReport(String filename) { public GATKReport(String filename) {
loadReport(new File(filename)); this(new File(filename));
} }
/** /**
@ -31,7 +35,6 @@ public class GATKReport {
* @param file the file to load * @param file the file to load
*/ */
public GATKReport(File file) { public GATKReport(File file) {
tables = new TreeMap<String, GATKReportTable>();
loadReport(file); loadReport(file);
} }
@ -46,11 +49,17 @@ public class GATKReport {
GATKReportTable table = null; GATKReportTable table = null;
String[] header = null; String[] header = null;
int id = 0; int id = 0;
GATKReportVersion version = null;
List<Integer> columnStarts = null;
String line; String line;
while ( (line = reader.readLine()) != null ) { while ( (line = reader.readLine()) != null ) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
line = line.replaceFirst("##:GATKReport.v0.1 ", ""); if (line.startsWith(GATKREPORT_HEADER_PREFIX)) {
version = GATKReportVersion.fromHeader(line);
line = line.replaceFirst("##:GATKReport." + version.versionString + " ", "");
String[] pieces = line.split(" : "); String[] pieces = line.split(" : ");
String tableName = pieces[0]; String tableName = pieces[0];
@ -58,14 +67,35 @@ public class GATKReport {
addTable(tableName, tableDesc); addTable(tableName, tableDesc);
table = getTable(tableName); table = getTable(tableName);
table.setVersion(version);
header = null; header = null;
} else if ( line.isEmpty() ) { columnStarts = null;
} else if ( line.trim().isEmpty() ) {
// do nothing // do nothing
} else { } else {
if (table != null) { if (table != null) {
String[] splitLine;
switch (version) {
case V0_1:
splitLine = TextFormattingUtils.splitWhiteSpace(line);
break;
case V0_2:
if (header == null) {
columnStarts = TextFormattingUtils.getWordStarts(line);
}
splitLine = TextFormattingUtils.splitFixedWidth(line, columnStarts);
break;
default:
throw new ReviewedStingException("GATK report version parsing not implemented for: " + line);
}
if (header == null) { if (header == null) {
header = line.split("\\s+"); header = splitLine;
table.addPrimaryKey("id", false); table.addPrimaryKey("id", false);
@ -75,10 +105,8 @@ public class GATKReport {
id = 0; id = 0;
} else { } else {
String[] entries = line.split("\\s+");
for (int columnIndex = 0; columnIndex < header.length; columnIndex++) { for (int columnIndex = 0; columnIndex < header.length; columnIndex++) {
table.set(id, header[columnIndex], entries[columnIndex]); table.set(id, header[columnIndex], splitLine[columnIndex]);
} }
id++; id++;
@ -125,7 +153,10 @@ public class GATKReport {
* @return the table object * @return the table object
*/ */
public GATKReportTable getTable(String tableName) { public GATKReportTable getTable(String tableName) {
return tables.get(tableName); GATKReportTable table = tables.get(tableName);
if (table == null)
throw new ReviewedStingException("Table is not in GATKReport: " + tableName);
return table;
} }
/** /**
@ -140,4 +171,8 @@ public class GATKReport {
} }
} }
} }
public Collection<GATKReportTable> getTables() {
return tables.values();
}
} }

View File

@ -37,10 +37,10 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
* tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero * tables, as the table gets written properly without having to waste storage for the unset elements (usually the zero
* values) in the table. * values) in the table.
* *
* @param primaryKey the primary key position in the column that should be set * @param primaryKey the primary key position in the column that should be retrieved
* @return the value at the specified position in the column, or the default value if the element is not set * @return the value at the specified position in the column, or the default value if the element is not set
*/ */
public Object getWithoutSideEffects(Object primaryKey) { private Object getWithoutSideEffects(Object primaryKey) {
if (!this.containsKey(primaryKey)) { if (!this.containsKey(primaryKey)) {
return defaultValue; return defaultValue;
} }
@ -48,6 +48,16 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return this.get(primaryKey); return this.get(primaryKey);
} }
/**
* Return an object from the column, but if it doesn't exist, return the default value.
*
* @param primaryKey the primary key position in the column that should be retrieved
* @return the string value at the specified position in the column, or the default value if the element is not set
*/
public String getStringValue(Object primaryKey) {
return toString(getWithoutSideEffects(primaryKey));
}
/** /**
* Return the displayable property of the column. If true, the column will be displayed in the final output. * Return the displayable property of the column. If true, the column will be displayed in the final output.
* If not, printing will be suppressed for the contents of the table. * If not, printing will be suppressed for the contents of the table.
@ -67,7 +77,7 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
for (Object obj : this.values()) { for (Object obj : this.values()) {
if (obj != null) { if (obj != null) {
int width = obj.toString().length(); int width = toString(obj).length();
if (width > maxWidth) { if (width > maxWidth) {
maxWidth = width; maxWidth = width;
@ -77,4 +87,27 @@ public class GATKReportColumn extends TreeMap<Object, Object> {
return maxWidth; return maxWidth;
} }
/**
* Returns a string version of the values.
* @param obj The object to convert to a string
* @return The string representation of the column
*/
private static String toString(Object obj) {
String value;
if (obj == null) {
value = "null";
} else if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
return value;
}
public String getColumnName() {
return columnName;
}
} }

View File

@ -0,0 +1,55 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import java.util.*;
/**
* Tracks a linked list of GATKReportColumn in order by name.
*/
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>();
/**
* Returns the column by index
* @param i the index
* @return The column
*/
public GATKReportColumn getByIndex(int i) {
return get(columnNames.get(i));
}
@Override
public GATKReportColumn remove(Object key) {
columnNames.remove(key);
return super.remove(key);
}
@Override
public GATKReportColumn put(String key, GATKReportColumn value) {
columnNames.add(key);
return super.put(key, value);
}
}

View File

@ -1,83 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
public class GATKReportParser {
private List<GATKReportTableParser> tables = new ArrayList<GATKReportTableParser>();
public void parse(File file) throws IOException {
InputStream stream = FileUtils.openInputStream(file);
try {
parse(stream);
} finally {
IOUtils.closeQuietly(stream);
}
}
public void parse(InputStream input) throws IOException {
GATKReportTableParser table = null;
for (String line: new XReadLines(input)) {
if (line.startsWith("##:GATKReport.v0.1 ")) {
table = newTableParser(line);
tables.add(table);
table.parse(line);
} else if (table != null) {
if (line.trim().length() == 0)
table = null;
else
table.parse(line);
}
}
}
public String getValue(String tableName, String[] key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
public String getValue(String tableName, String key, String column) {
for (GATKReportTableParser table: tables)
if (table.getTableName().equals(tableName))
return table.getValue(key, column);
return null;
}
private GATKReportTableParser newTableParser(String header) {
return new GATKReportTableParser();
}
}

View File

@ -1,5 +1,6 @@
package org.broadinstitute.sting.gatk.report; package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.ObjectUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.PrintStream; import java.io.PrintStream;
@ -88,17 +89,22 @@ import java.util.regex.Pattern;
* but at least the prototype contained herein works. * but at least the prototype contained herein works.
* *
* @author Kiran Garimella * @author Kiran Garimella
* @author Khalid Shakir
*/ */
public class GATKReportTable { public class GATKReportTable {
/** REGEX that matches any table with an invalid name */
public final static String INVALID_TABLE_NAME_REGEX = "[^a-zA-Z0-9_\\-\\.]";
private static final GATKReportVersion LATEST_REPORT_VERSION = GATKReportVersion.V0_2;
private String tableName; private String tableName;
private String tableDescription; private String tableDescription;
private GATKReportVersion version = LATEST_REPORT_VERSION;
private String primaryKeyName; private String primaryKeyName;
private Collection<Object> primaryKeyColumn; private Collection<Object> primaryKeyColumn;
private boolean primaryKeyDisplay; private boolean primaryKeyDisplay;
boolean sortByPrimaryKey = true; private boolean sortByPrimaryKey = true;
private LinkedHashMap<String, GATKReportColumn> columns; private GATKReportColumns columns;
/** /**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed * Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
@ -107,12 +113,25 @@ public class GATKReportTable {
* @return true if the name is valid, false if otherwise * @return true if the name is valid, false if otherwise
*/ */
private boolean isValidName(String name) { private boolean isValidName(String name) {
Pattern p = Pattern.compile("[^a-zA-Z0-9_\\-\\.]"); Pattern p = Pattern.compile(INVALID_TABLE_NAME_REGEX);
Matcher m = p.matcher(name); Matcher m = p.matcher(name);
return !m.find(); return !m.find();
} }
/**
* Verifies that a table or column name has only alphanumeric characters - no spaces or special characters allowed
*
* @param description the name of the table or column
* @return true if the name is valid, false if otherwise
*/
private boolean isValidDescription(String description) {
Pattern p = Pattern.compile("\\r|\\n");
Matcher m = p.matcher(description);
return !m.find();
}
/** /**
* Construct a new GATK report table with the specified name and description * Construct a new GATK report table with the specified name and description
* *
@ -128,11 +147,23 @@ public class GATKReportTable {
throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed."); throw new ReviewedStingException("Attempted to set a GATKReportTable name of '" + tableName + "'. GATKReportTable names must be purely alphanumeric - no spaces or special characters are allowed.");
} }
if (!isValidDescription(tableDescription)) {
throw new ReviewedStingException("Attempted to set a GATKReportTable description of '" + tableDescription + "'. GATKReportTable descriptions must not contain newlines.");
}
this.tableName = tableName; this.tableName = tableName;
this.tableDescription = tableDescription; this.tableDescription = tableDescription;
this.sortByPrimaryKey = sortByPrimaryKey; this.sortByPrimaryKey = sortByPrimaryKey;
columns = new LinkedHashMap<String, GATKReportColumn>(); columns = new GATKReportColumns();
}
public GATKReportVersion getVersion() {
return version;
}
protected void setVersion(GATKReportVersion version) {
this.version = version;
} }
/** /**
@ -161,6 +192,57 @@ public class GATKReportTable {
primaryKeyDisplay = display; primaryKeyDisplay = display;
} }
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or throws an exception.
*/
public Object getPrimaryKey(String dottedColumnValues) {
Object key = findPrimaryKey(dottedColumnValues);
if (key == null)
throw new ReviewedStingException("Attempted to get non-existent GATKReportTable key for values: " + dottedColumnValues);
return key;
}
/**
* Returns true if there is at least on row with the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return true if there is at least one row matching the columns.
*/
public boolean containsPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues) != null;
}
/**
* Returns the first primary key matching the dotted column values.
* Ex: dbsnp.eval.called.all.novel.all
* @param dottedColumnValues Period concatenated values.
* @return The first primary key matching the column values or null.
*/
private Object findPrimaryKey(String dottedColumnValues) {
return findPrimaryKey(dottedColumnValues.split("\\."));
}
/**
* Returns the first primary key matching the column values.
* Ex: new String[] { "dbsnp", "eval", "called", "all", "novel", "all" }
* @param columnValues column values.
* @return The first primary key matching the column values.
*/
private Object findPrimaryKey(Object[] columnValues) {
for (Object primaryKey : primaryKeyColumn) {
boolean matching = true;
for (int i = 0; matching && i < columnValues.length; i++) {
matching = ObjectUtils.equals(columnValues[i], get(primaryKey, i+1));
}
if (matching)
return primaryKey;
}
return null;
}
/** /**
* Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set. * Add a column to the report and specify the default value that should be supplied if a given position in the table is never explicitly set.
* *
@ -230,6 +312,17 @@ public class GATKReportTable {
return columns.get(columnName).get(primaryKey); return columns.get(columnName).get(primaryKey);
} }
/**
* Get a value from the given position in the table
*
* @param primaryKey the primary key value
* @param columnIndex the index of the column
* @return the value stored at the specified position in the table
*/
private Object get(Object primaryKey, int columnIndex) {
return columns.getByIndex(columnIndex).get(primaryKey);
}
/** /**
* Increment an element in the table. This implementation is awful - a functor would probably be better. * Increment an element in the table. This implementation is awful - a functor would probably be better.
* *
@ -515,7 +608,7 @@ public class GATKReportTable {
String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s"; String primaryKeyFormat = "%-" + getPrimaryKeyColumnWidth() + "s";
// Emit the table definition // Emit the table definition
out.printf("##:GATKReport.v0.1 %s : %s%n", tableName, tableDescription); out.printf("##:GATKReport.%s %s : %s%n", LATEST_REPORT_VERSION.versionString, tableName, tableDescription);
// Emit the table header, taking into account the padding requirement if the primary key is a hidden column // Emit the table header, taking into account the padding requirement if the primary key is a hidden column
boolean needsPadding = false; boolean needsPadding = false;
@ -545,22 +638,8 @@ public class GATKReportTable {
for (String columnName : columns.keySet()) { for (String columnName : columns.keySet()) {
if (columns.get(columnName).isDisplayable()) { if (columns.get(columnName).isDisplayable()) {
Object obj = columns.get(columnName).getWithoutSideEffects(primaryKey);
if (needsPadding) { out.printf(" "); } if (needsPadding) { out.printf(" "); }
String value = columns.get(columnName).getStringValue(primaryKey);
String value = "null";
if (obj != null) {
if (obj instanceof Float) {
value = String.format("%.8f", (Float) obj);
} else if (obj instanceof Double) {
value = String.format("%.8f", (Double) obj);
} else {
value = obj.toString();
}
}
//out.printf(columnWidths.get(columnName), obj == null ? "null" : obj.toString());
out.printf(columnWidths.get(columnName), value); out.printf(columnWidths.get(columnName), value);
needsPadding = true; needsPadding = true;
@ -577,4 +656,16 @@ public class GATKReportTable {
public int getNumRows() { public int getNumRows() {
return primaryKeyColumn.size(); return primaryKeyColumn.size();
} }
public String getTableName() {
return tableName;
}
public String getTableDescription() {
return tableDescription;
}
public GATKReportColumns getColumns() {
return columns;
}
} }

View File

@ -1,75 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.apache.commons.lang.StringUtils;
import java.util.*;
public class GATKReportTableParser {
private int lineNum = 0;
private String[] descriptions;
private Map<String, Integer> headers = new HashMap<String, Integer>();
private List<String[]> values = new ArrayList<String[]>();
public void parse(String line) {
lineNum++;
switch (lineNum) {
case 1:
descriptions = parseLine(line);
case 2:
String[] columnHeaders = parseLine(line);
for (int i = 0; i < columnHeaders.length; i++)
headers.put(columnHeaders[i], i);
default:
values.add(parseLine(line));
}
}
public String getTableName() {
return descriptions[1];
}
public String getValue(String[] key, String column) {
if (!headers.containsKey(column))
return null;
for (String[] row: values)
if (Arrays.equals(key, Arrays.copyOfRange(row, 1, key.length + 1)))
return row[headers.get(column)];
return null;
}
public String getValue(String key, String column) {
return getValue(key.split("\\."), column);
}
private String generateKey(String[] row, int i) {
return StringUtils.join(row, ".", 0, i);
}
private String[] parseLine(String line) {
return line.split(" +");
}
}

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
public enum GATKReportVersion {
/**
* Differences between other versions:
* - Does not allow spaces in cells.
* - Mostly fixed width but has a bug where the string width of floating point
* values was not measured correctly leading to columns that aren't aligned
*/
V0_1("v0.1"),
/**
* Differences between other versions:
* - Spaces allowed in cells, for example in sample names with spaces in them ex: "C507/FG-CR 6".
* - Fixed width fixed for floating point values
*/
V0_2("v0.2");
public final String versionString;
private GATKReportVersion(String versionString) {
this.versionString = versionString;
}
@Override
public String toString() {
return versionString;
}
/**
* Returns the GATK Report Version from the file header.
* @param header Header from the file starting with ##:GATKReport.v[version]
* @return The version as an enum.
*/
public static GATKReportVersion fromHeader(String header) {
if (header.startsWith("##:GATKReport.v0.1 "))
return GATKReportVersion.V0_1;
if (header.startsWith("##:GATKReport.v0.2 "))
return GATKReportVersion.V0_2;
throw new ReviewedStingException("Unknown GATK report version in header: " + header);
}
}

View File

@ -115,12 +115,13 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>(); LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
/** We use the SimpleTimer to time our run */ /** We use the SimpleTimer to time our run */
private SimpleTimer timer = new SimpleTimer("Traversal"); private SimpleTimer timer = null;
// How long can we go without printing some progress info? // How long can we go without printing some progress info?
private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000; private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
private int printProgressCheckCounter = 0; private int printProgressCheckCounter = 0;
private long lastProgressPrintTime = -1; // When was the last time we printed progress log? private long lastProgressPrintTime = -1; // When was the last time we printed progress log?
private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 120 * 1000; // in milliseconds
private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds private long PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0; private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0; private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
@ -209,11 +210,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
} }
} }
/** /**
* Should be called to indicate that we're going to process records and the timer should start ticking * Should be called to indicate that we're going to process records and the timer should start ticking. This
* function should be called right before any traversal work is done, to avoid counting setup costs in the
* processing costs and inflating the estimated runtime.
*/ */
public void startTimers() { public void startTimersIfNecessary() {
timer.start(); if ( timer == null ) {
lastProgressPrintTime = timer.currentTime(); timer = new SimpleTimer("Traversal");
timer.start();
lastProgressPrintTime = timer.currentTime();
}
} }
/** /**
@ -224,7 +230,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
* @return true if the maximum interval (in millisecs) has passed since the last printing * @return true if the maximum interval (in millisecs) has passed since the last printing
*/ */
private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) { private boolean maxElapsedIntervalForPrinting(final long curTime, long lastPrintTime, long printFreq) {
return (curTime - lastPrintTime) > printFreq; long elapsed = curTime - lastPrintTime;
return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
} }
/** /**

View File

@ -173,7 +173,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* -> those with the same mate pair position, for paired reads * -> those with the same mate pair position, for paired reads
* -> those flagged as unpaired and duplicated but having the same start and end * -> those flagged as unpaired and duplicated but having the same start and end
*/ */
boolean done = walker.isDone();
for (SAMRecord read : iter) { for (SAMRecord read : iter) {
if ( done ) break;
// get the genome loc from the read // get the genome loc from the read
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
@ -194,6 +196,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
} }
printProgress(dataProvider.getShard(),site); printProgress(dataProvider.getShard(),site);
done = walker.isDone();
} }
return sum; return sum;

View File

@ -33,6 +33,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider)); logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
LocusView locusView = getLocusView( walker, dataProvider ); LocusView locusView = getLocusView( walker, dataProvider );
boolean done = false;
if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
@ -46,7 +47,7 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider ); LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
// We keep processing while the next reference location is within the interval // We keep processing while the next reference location is within the interval
while( locusView.hasNext() ) { while( locusView.hasNext() && ! done ) {
AlignmentContext locus = locusView.next(); AlignmentContext locus = locusView.next();
GenomeLoc location = locus.getLocation(); GenomeLoc location = locus.getLocation();
@ -65,26 +66,28 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
referenceView.expandBoundsToAccomodateLoc(location); referenceView.expandBoundsToAccomodateLoc(location);
} }
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation());
// create reference context. Note that if we have a pileup of "extended events", the context will // create reference context. Note that if we have a pileup of "extended events", the context will
// hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup). // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
ReferenceContext refContext = referenceView.getReferenceContext(location); ReferenceContext refContext = referenceView.getReferenceContext(location);
// Iterate forward to get all reference ordered data covering this location
final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
final boolean keepMeP = walker.filter(tracker, refContext, locus); final boolean keepMeP = walker.filter(tracker, refContext, locus);
if (keepMeP) { if (keepMeP) {
M x = walker.map(tracker, refContext, locus); M x = walker.map(tracker, refContext, locus);
sum = walker.reduce(x, sum); sum = walker.reduce(x, sum);
done = walker.isDone();
} }
printProgress(dataProvider.getShard(),locus.getLocation()); printProgress(dataProvider.getShard(),locus.getLocation());
} }
} }
// We have a final map call to execute here to clean up the skipped based from the // We have a final map call to execute here to clean up the skipped based from the
// last position in the ROD to that in the interval // last position in the ROD to that in the interval
if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA ) { if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
// only do this if the walker isn't done!
RodLocusView rodLocusView = (RodLocusView)locusView; RodLocusView rodLocusView = (RodLocusView)locusView;
long nSkipped = rodLocusView.getLastSkippedBases(); long nSkipped = rodLocusView.getLastSkippedBases();
if ( nSkipped > 0 ) { if ( nSkipped > 0 ) {

View File

@ -50,7 +50,9 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
ReadView reads = new ReadView(dataProvider); ReadView reads = new ReadView(dataProvider);
List<SAMRecord> pairs = new ArrayList<SAMRecord>(); List<SAMRecord> pairs = new ArrayList<SAMRecord>();
boolean done = walker.isDone();
for(SAMRecord read: reads) { for(SAMRecord read: reads) {
if ( done ) break;
dataProvider.getShard().getReadMetrics().incrementNumReadsSeen(); dataProvider.getShard().getReadMetrics().incrementNumReadsSeen();
if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) { if(pairs.size() == 0 || pairs.get(0).getReadName().equals(read.getReadName())) {
@ -65,6 +67,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
printProgress(dataProvider.getShard(),null); printProgress(dataProvider.getShard(),null);
} }
done = walker.isDone();
} }
// If any data was left in the queue, process it. // If any data was left in the queue, process it.

View File

@ -82,8 +82,10 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
// get the reference ordered data // get the reference ordered data
ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider); ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
boolean done = walker.isDone();
// while we still have more reads // while we still have more reads
for (SAMRecord read : reads) { for (SAMRecord read : reads) {
if ( done ) break;
// ReferenceContext -- the reference bases covered by the read // ReferenceContext -- the reference bases covered by the read
ReferenceContext refContext = null; ReferenceContext refContext = null;
@ -106,6 +108,7 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart()); GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
printProgress(dataProvider.getShard(),locus); printProgress(dataProvider.getShard(),locus);
done = walker.isDone();
} }
return sum; return sum;
} }

View File

@ -23,5 +23,4 @@ import java.lang.annotation.*;
@Target(ElementType.TYPE) @Target(ElementType.TYPE)
public @interface Allows { public @interface Allows {
DataSource[] value(); DataSource[] value();
RMD[] referenceMetaData() default {};
} }

View File

@ -30,7 +30,9 @@ import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.io.StingSAMFileWriter; import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
@ -50,44 +52,158 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern; import java.util.regex.Pattern;
/** /**
* This ReadWalker provides simple, yet powerful read clipping capabilities. It allows the user to clip bases in reads * This tool provides simple, powerful read clipping capabilities to remove low quality strings of bases, sections of reads, and reads containing user-provided sequences.
* with poor quality scores, that match particular sequences, or that were generated by particular machine cycles. *
*
* <p>
* It allows the user to clip bases in reads with poor quality scores, that match particular
* sequences, or that were generated by particular machine cycles.
*
* <dl>
* <dt>Quality score based clipping</dt>
* <dd>
* Clip bases from the read in clipper from
* <br>argmax_x{ \sum{i = x + 1}^l (qTrimmingThreshold - qual)</br>
* to the end of the read. This is blatantly stolen from BWA.
*
* Walk through the read from the end (in machine cycle order) to the beginning, calculating the
* running sum of qTrimmingThreshold - qual. While we do this, we track the maximum value of this
* sum where the delta > 0. After the loop, clipPoint is either -1 (don't do anything) or the
* clipping index in the read (from the end).
* </dd>
* <dt>Cycle based clipping</dt>
* <dd>Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based values (positions).
* For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11, and 12.
* </dd>
* <dt>Sequence matching</dt>
* <dd>Clips bases from that exactly match one of a number of base sequences. This employs an exact match algorithm,
* filtering only bases whose sequence exactly matches SEQ.</dd>
* </dl>
*
* </p>
*
* <h2>Input</h2>
* <p>
* Any number of BAM files.
* </p>
*
* <h2>Output</h2>
* <p>
* A new BAM file containing all of the reads from the input BAMs with the user-specified clipping
* operation applied to each read.
* </p>
* <p>
* <h3>Summary output</h3>
* <pre>
* Number of examined reads 13
* Number of clipped reads 13
* Percent of clipped reads 100.00
* Number of examined bases 988
* Number of clipped bases 126
* Percent of clipped bases 12.75
* Number of quality-score clipped bases 126
* Number of range clipped bases 0
* Number of sequence clipped bases 0
* </pre>
* </p>
*
* <p>
* <h3>Example clipping</h3>
* Suppose we are given this read:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* If we are clipping reads with -QT 10 and -CR WRITE_NS, we get:
*
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* NNNNNNNNNNNNNNNNNTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* Whereas with -CR WRITE_Q0S:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3116 29 76M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* !!!!!!!!!!!!!!!!!4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
*
* Or -CR SOFTCLIP_BASES:
* <pre>
* 314KGAAXX090507:1:19:1420:1123#0 16 chrM 3133 29 17S59M * * *
* TAGGACCCGGGCCCCCCTCCCCAATCCTCCAACGCATATAGCGGCCGCGCCTTCCCCCGTAAATGATATCATCTCA
* #################4?6/?2135;;;'1/=/<'B9;12;68?A79@,@==@9?=AAA3;A@B;A?B54;?ABA
* </pre>
* </p>
*
* <h2>Examples</h2>
* <pre>
* -T ClipReads -I my.bam -I your.bam -o my_and_your.clipped.bam -R Homo_sapiens_assembly18.fasta \
* -XF seqsToClip.fasta -X CCCCC -CT "1-5,11-15" -QT 10
* </pre>
* @author Mark DePristo
* @since 2010
*/ */
@Requires({DataSource.READS}) @Requires({DataSource.READS})
public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.ClippingData> { public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithData, ClipReadsWalker.ClippingData> {
@Output /**
PrintStream out; * If provided, ClipReads will write summary statistics about the clipping operations applied
* to the reads to this file.
*/
@Output(fullName = "outputStatistics", shortName = "os", doc = "Write output statistics to this file", required = false)
PrintStream out = null;
/** /**
* an optional argument to dump the reads out to a BAM file * The output SAM/BAM file will be written here
*/ */
@Argument(fullName = "outputBam", shortName = "ob", doc = "Write output to this BAM filename instead of STDOUT", required = false) @Output(doc = "Write BAM output here", required = true)
StingSAMFileWriter outputBam = null; StingSAMFileWriter outputBam;
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "", required = false) /**
* If a value > 0 is provided, then the quality score based read clipper will be applied to the reads using this
* quality score threshold.
*/
@Argument(fullName = "qTrimmingThreshold", shortName = "QT", doc = "If provided, the Q-score clipper will be applied", required = false)
int qTrimmingThreshold = -1; int qTrimmingThreshold = -1;
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String of the form 1-10,20-30 indicating machine cycles to clip from the reads", required = false) /**
* Clips machine cycles from the read. Accepts a string of ranges of the form start1-end1,start2-end2, etc.
* For each start/end pair, removes bases in machine cycles from start to end, inclusive. These are 1-based
* values (positions). For example, 1-5,10-12 clips the first 5 bases, and then three bases at cycles 10, 11,
* and 12.
*/
@Argument(fullName = "cyclesToTrim", shortName = "CT", doc = "String indicating machine cycles to clip from the reads", required = false)
String cyclesToClipArg = null; String cyclesToClipArg = null;
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching these sequences", required = false) /**
* Reads the sequences in the provided FASTA file, and clip any bases that exactly match any of the
* sequences in the file.
*/
@Argument(fullName = "clipSequencesFile", shortName = "XF", doc = "Remove sequences within reads matching the sequences in this FASTA file", required = false)
String clipSequenceFile = null; String clipSequenceFile = null;
/**
* Clips bases from the reads matching the provided SEQ. Can be provided any number of times on the command line
*/
@Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false) @Argument(fullName = "clipSequence", shortName = "X", doc = "Remove sequences within reads matching this sequence", required = false)
String[] clipSequencesArgs = null; String[] clipSequencesArgs = null;
@Argument(fullName="read", doc="", required=false) /**
String onlyDoRead = null; * The different values for this argument determines how ClipReads applies clips to the reads. This can range
* from writing Ns over the clipped bases to hard clipping away the bases from the BAM.
//@Argument(fullName = "keepCompletelyClipped", shortName = "KCC", doc = "Unfortunately, sometimes a read is completely clipped away but with SOFTCLIP_BASES this results in an invalid CIGAR string. ", required = false) */
//boolean keepCompletelyClippedReads = false;
// @Argument(fullName = "onlyClipFirstSeqMatch", shortName = "ESC", doc="Only clip the first occurrence of a clipping sequence, rather than all subsequences within a read that match", required = false)
// boolean onlyClipFirstSeqMatch = false;
@Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false) @Argument(fullName = "clipRepresentation", shortName = "CR", doc = "How should we actually clip the bases?", required = false)
ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS; ClippingRepresentation clippingRepresentation = ClippingRepresentation.WRITE_NS;
@Hidden
@Advanced
@Argument(fullName="read", doc="", required=false)
String onlyDoRead = null;
/** /**
* List of sequence that should be clipped from the reads * List of sequence that should be clipped from the reads
@ -180,12 +296,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
* @param read the read itself, as a SAMRecord * @param read the read itself, as a SAMRecord
* @return the ReadClipper object describing what should be done to clip this read * @return the ReadClipper object describing what should be done to clip this read
*/ */
public ReadClipper map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) { if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) { if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
read = ReadUtils.replaceSoftClipsWithMatches(read); read = ReadUtils.replaceSoftClipsWithMatches(read);
} }
ReadClipper clipper = new ReadClipper(read); ReadClipperWithData clipper = new ReadClipperWithData(read, sequencesToClip);
// //
// run all three clipping modules // run all three clipping modules
@ -205,9 +321,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
* *
* @param clipper * @param clipper
*/ */
private void clipSequences(ReadClipper clipper) { private void clipSequences(ReadClipperWithData clipper) {
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
SAMRecord read = clipper.getRead(); SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (SeqToClip stc : sequencesToClip) { for (SeqToClip stc : sequencesToClip) {
// we have a pattern for both the forward and the reverse strands // we have a pattern for both the forward and the reverse strands
@ -223,11 +340,14 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
if (found) { if (found) {
int start = match.start(); int start = match.start();
int stop = match.end() - 1; int stop = match.end() - 1;
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq); //ClippingOp op = new ClippingOp(ClippingOp.ClippingType.MATCHES_CLIP_SEQ, start, stop, stc.seq);
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op); clipper.addOp(op);
data.incSeqClippedBases(stc.seq, op.getLength());
} }
} }
} }
clipper.setData(data);
} }
} }
@ -252,9 +372,10 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
* *
* @param clipper * @param clipper
*/ */
private void clipCycles(ReadClipper clipper) { private void clipCycles(ReadClipperWithData clipper) {
if (cyclesToClip != null) { if (cyclesToClip != null) {
SAMRecord read = clipper.getRead(); SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
int cycleStart = p.first; int cycleStart = p.first;
@ -270,10 +391,13 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
int start = startStop.first; int start = startStop.first;
int stop = startStop.second; int stop = startStop.second;
ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null); //ClippingOp op = new ClippingOp(ClippingOp.ClippingType.WITHIN_CLIP_RANGE, start, stop, null);
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op); clipper.addOp(op);
data.incNRangeClippedBases(op.getLength());
} }
} }
clipper.setData(data);
} }
} }
@ -291,8 +415,9 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
* *
* @param clipper * @param clipper
*/ */
private void clipBadQualityScores(ReadClipper clipper) { private void clipBadQualityScores(ReadClipperWithData clipper) {
SAMRecord read = clipper.getRead(); SAMRecord read = clipper.getRead();
ClippingData data = clipper.getData();
int readLen = read.getReadBases().length; int readLen = read.getReadBases().length;
byte[] quals = read.getBaseQualities(); byte[] quals = read.getBaseQualities();
@ -311,8 +436,12 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
if (clipPoint != -1) { if (clipPoint != -1) {
int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint; int start = read.getReadNegativeStrandFlag() ? 0 : clipPoint;
int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1; int stop = read.getReadNegativeStrandFlag() ? clipPoint : readLen - 1;
clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null)); //clipper.addOp(new ClippingOp(ClippingOp.ClippingType.LOW_Q_SCORES, start, stop, null));
ClippingOp op = new ClippingOp(start, stop);
clipper.addOp(op);
data.incNQClippedBases(op.getLength());
} }
clipper.setData(data);
} }
/** /**
@ -325,7 +454,7 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
return new ClippingData(sequencesToClip); return new ClippingData(sequencesToClip);
} }
public ClippingData reduce(ReadClipper clipper, ClippingData data) { public ClippingData reduce(ReadClipperWithData clipper, ClippingData data) {
if ( clipper == null ) if ( clipper == null )
return data; return data;
@ -340,23 +469,8 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
data.nTotalBases += clipper.getRead().getReadLength(); data.nTotalBases += clipper.getRead().getReadLength();
if (clipper.wasClipped()) { if (clipper.wasClipped()) {
data.nClippedReads++; data.nClippedReads++;
for (ClippingOp op : clipper.getOps()) { data.addData(clipper.getData());
switch (op.type) {
case LOW_Q_SCORES:
data.incNQClippedBases(op.getLength());
break;
case WITHIN_CLIP_RANGE:
data.incNRangeClippedBases(op.getLength());
break;
case MATCHES_CLIP_SEQ:
data.incSeqClippedBases((String) op.extraInfo, op.getLength());
break;
default:
throw new IllegalStateException("Unexpected Clipping operator type " + op);
}
}
} }
return data; return data;
} }
@ -417,6 +531,23 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
seqClipCounts.put(seq, seqClipCounts.get(seq) + n); seqClipCounts.put(seq, seqClipCounts.get(seq) + n);
} }
public void addData (ClippingData data) {
nTotalReads += data.nTotalReads;
nTotalBases += data.nTotalBases;
nClippedReads += data.nClippedReads;
nClippedBases += data.nClippedBases;
nQClippedBases += data.nQClippedBases;
nRangeClippedBases += data.nRangeClippedBases;
nSeqClippedBases += data.nSeqClippedBases;
for (String seqClip : data.seqClipCounts.keySet()) {
Long count = data.seqClipCounts.get(seqClip);
if (seqClipCounts.containsKey(seqClip))
count += seqClipCounts.get(seqClip);
seqClipCounts.put(seqClip, count);
}
}
public String toString() { public String toString() {
StringBuilder s = new StringBuilder(); StringBuilder s = new StringBuilder();
@ -439,4 +570,27 @@ public class ClipReadsWalker extends ReadWalker<ReadClipper, ClipReadsWalker.Cli
return s.toString(); return s.toString();
} }
} }
public class ReadClipperWithData extends ReadClipper {
private ClippingData data;
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
super(read);
data = new ClippingData(clipSeqs);
}
public ClippingData getData() {
return data;
}
public void setData(ClippingData data) {
this.data = data;
}
public void addData(ClippingData data) {
this.data.addData(data);
}
}
} }

View File

@ -2,7 +2,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
@ -17,7 +17,7 @@ import java.util.Set;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
@Requires({DataSource.READS,DataSource.REFERENCE}) @Requires({DataSource.READS,DataSource.REFERENCE})
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) { public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {

View File

@ -3,8 +3,8 @@ package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter; import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckReadFilter; import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentReadFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -18,7 +18,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@By(DataSource.READS) @By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.INTERVAL) @PartitionBy(PartitionType.INTERVAL)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentReadFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckReadFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { public boolean filter(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

View File

@ -25,15 +25,14 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.dbsnp.DbSNPFeature; import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.refdata.utils.helpers.DbSNPHelper;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -41,6 +40,7 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Collections;
import java.util.List; import java.util.List;
/** /**
@ -68,6 +68,9 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
@Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events") @Argument(fullName="showIndelPileups",shortName="show_indels",doc="In addition to base pileups, generate pileups of extended indel events")
public boolean SHOW_INDEL_PILEUPS = false; public boolean SHOW_INDEL_PILEUPS = false;
@Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
public List<RodBinding<Feature>> rods = Collections.emptyList();
public void initialize() { public void initialize() {
} }
@ -112,18 +115,11 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
*/ */
private String getReferenceOrderedData( RefMetaDataTracker tracker ) { private String getReferenceOrderedData( RefMetaDataTracker tracker ) {
ArrayList<String> rodStrings = new ArrayList<String>(); ArrayList<String> rodStrings = new ArrayList<String>();
for ( GATKFeature datum : tracker.getAllRods() ) { for ( Feature datum : tracker.getValues(rods) ) {
if ( datum != null && datum.getUnderlyingObject() instanceof ReferenceOrderedDatum ) { rodStrings.add(datum.toString());
rodStrings.add(((ReferenceOrderedDatum)datum.getUnderlyingObject()).toSimpleString()); // TODO: Aaron: this line still survives, try to remove it
}
} }
String rodString = Utils.join(", ", rodStrings); String rodString = Utils.join(", ", rodStrings);
DbSNPFeature dbsnp = tracker.lookup(DbSNPHelper.STANDARD_DBSNP_TRACK_NAME, DbSNPFeature.class);
if ( dbsnp != null)
rodString += DbSNPHelper.toMediumString(dbsnp);
if ( !rodString.equals("") ) if ( !rodString.equals("") )
rodString = "[ROD: " + rodString + "]"; rodString = "[ROD: " + rodString + "]";
@ -132,8 +128,6 @@ public class PileupWalker extends LocusWalker<Integer, Integer> implements TreeR
@Override @Override
public void onTraversalDone(Integer result) { public void onTraversalDone(Integer result) {
// Double check traversal result to make count is the same.
// TODO: Is this check necessary?
out.println("[REDUCE RESULT] Traversal result is: " + result); out.println("[REDUCE RESULT] Traversal result is: " + result);
} }
} }

View File

@ -25,21 +25,24 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Iterator;
/** /**
* Prints out all of the RODs in the input data set. Data is rendered using the toString() method * Prints out all of the RODs in the input data set. Data is rendered using the toString() method
* of the given ROD. * of the given ROD.
*/ */
public class PrintRODsWalker extends RodWalker<Integer, Integer> { public class PrintRODsWalker extends RodWalker<Integer, Integer> {
@Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
public RodBinding<Feature> input;
@Output @Output
PrintStream out; PrintStream out;
@ -61,11 +64,8 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
if ( tracker == null ) if ( tracker == null )
return 0; return 0;
Iterator<GATKFeature> rods = tracker.getAllRods().iterator(); for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
while ( rods.hasNext() ) { out.println(feature.toString());
Object rod = rods.next().getUnderlyingObject();
if (VariantContextAdaptors.canBeConvertedToVariantContext(rod) )
out.println(rod.toString());
} }
return 1; return 1;

View File

@ -40,26 +40,72 @@ import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
/** /**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
* in the input file. It can dynamically merge the contents of multiple input BAM files, resulting *
* in merged output sorted in coordinate order. Can also optionally filter reads based on the --read-filter * <p>
* command line argument. * PrintReads can dynamically merge the contents of multiple input BAM files, resulting
* in merged output sorted in coordinate order. Can also optionally filter reads based on the
* --read_filter command line argument.
*
* <h2>Input</h2>
* <p>
* One or more bam files.
* </p>
*
* <h2>Output</h2>
* <p>
* A single processed bam file.
* </p>
*
* <h2>Examples</h2>
* <pre>
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
*
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input.bam \
* -n 2000
* </pre>
*
*/ */
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT) @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = BAQ.ApplicationTime.ON_OUTPUT)
@Requires({DataSource.READS, DataSource.REFERENCE}) @Requires({DataSource.READS, DataSource.REFERENCE})
public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> { public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** an optional argument to dump the reads out to a BAM file */
@Output(doc="Write output to this BAM filename instead of STDOUT") @Output(doc="Write output to this BAM filename instead of STDOUT")
SAMFileWriter out; SAMFileWriter out;
@Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false) @Argument(fullName = "readGroup", shortName = "readGroup", doc="Exclude all reads with this read group from the output", required = false)
String readGroup = null; String readGroup = null;
/**
* For example, --platform ILLUMINA or --platform 454.
*/
@Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false) @Argument(fullName = "platform", shortName = "platform", doc="Exclude all reads with this platform from the output", required = false)
String platform = null; // E.g. ILLUMINA, 454 String platform = null;
@Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false) @Argument(fullName = "number", shortName = "n", doc="Print the first n reads from the file, discarding the rest", required = false)
int nReadsToPrint = -1; int nReadsToPrint = -1;
/**
* Only reads from samples listed in the provided file(s) will be included in the output.
*/
@Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false) @Argument(fullName="sample_file", shortName="sf", doc="File containing a list of samples (one per line). Can be specified multiple times", required=false)
public Set<File> sampleFile = new TreeSet<File>(); public Set<File> sampleFile = new TreeSet<File>();
/**
* Only reads from the sample(s) will be included in the output.
*/
@Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false) @Argument(fullName="sample_name", shortName="sn", doc="Sample name to be included in the analysis. Can be specified multiple times.", required=false)
public Set<String> sampleNames = new TreeSet<String>(); public Set<String> sampleNames = new TreeSet<String>();

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import net.sf.picard.filter.SamRecordFilter; import net.sf.picard.filter.SamRecordFilter;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.lang.annotation.*; import java.lang.annotation.*;

View File

@ -26,11 +26,14 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
import java.util.List; import java.util.List;
@ -44,6 +47,10 @@ import java.util.List;
@ReadFilters(MalformedReadFilter.class) @ReadFilters(MalformedReadFilter.class)
@PartitionBy(PartitionType.NONE) @PartitionBy(PartitionType.NONE)
@BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT) @BAQMode(QualityMode = BAQ.QualityMode.OVERWRITE_QUALS, ApplicationTime = BAQ.ApplicationTime.ON_INPUT)
@DocumentedGATKFeature(
groupName = "GATK walkers",
summary = "General tools available for running on the command line as part of the GATK package",
extraDocs = {CommandLineGATK.class})
public abstract class Walker<MapType, ReduceType> { public abstract class Walker<MapType, ReduceType> {
final protected static Logger logger = Logger.getLogger(Walker.class); final protected static Logger logger = Logger.getLogger(Walker.class);
private GenomeAnalysisEngine toolkit; private GenomeAnalysisEngine toolkit;
@ -119,6 +126,17 @@ public abstract class Walker<MapType, ReduceType> {
public void initialize() { } public void initialize() { }
/**
* A function for overloading in subclasses providing a mechanism to abort early from a walker.
*
* If this ever returns true, then the Traversal engine will stop executing map calls
* and start the process of shutting down the walker in an orderly fashion.
* @return
*/
public boolean isDone() {
return false;
}
/** /**
* Provide an initial value for reduce computations. * Provide an initial value for reduce computations.
* @return Initial value of reduce. * @return Initial value of reduce.

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -42,9 +43,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class AlleleBalance implements InfoFieldAnnotation { public class AlleleBalance extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 ) if ( stratifiedContexts.size() == 0 )
return null; return null;
@ -89,7 +90,7 @@ public class AlleleBalance implements InfoFieldAnnotation {
} }
// todo -- actually care about indel length from the pileup (agnostic at the moment) // todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size(); int refCount = indelPileup.size();
int altCount = vc.isInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions(); int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) { if ( refCount + altCount == 0 ) {
continue; continue;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
@ -15,9 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
public class AlleleBalanceBySample implements GenotypeAnnotation, ExperimentalAnnotation { public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
Double ratio = annotateSNP(stratifiedContext, vc, g); Double ratio = annotateSNP(stratifiedContext, vc, g);
if (ratio == null) if (ratio == null)
return null; return null;

View File

@ -8,7 +8,7 @@ import java.util.Map;
public abstract class AnnotationByDepth implements InfoFieldAnnotation { public abstract class AnnotationByDepth extends InfoFieldAnnotation {
protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) { protected int annotationByVariantDepth(final Map<String, Genotype> genotypes, Map<String, AlignmentContext> stratifiedContexts) {

View File

@ -34,6 +34,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
@ -46,9 +47,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class BaseCounts implements InfoFieldAnnotation { public class BaseCounts extends InfoFieldAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 ) if ( stratifiedContexts.size() == 0 )
return null; return null;

View File

@ -28,6 +28,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -43,14 +44,14 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class ChromosomeCounts implements InfoFieldAnnotation, StandardAnnotation { public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY }; private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"), private VCFInfoHeaderLine[] descriptions = { new VCFInfoHeaderLine(VCFConstants.ALLELE_FREQUENCY_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Float, "Allele Frequency, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"), new VCFInfoHeaderLine(VCFConstants.ALLELE_COUNT_KEY, VCFHeaderLineCount.A, VCFHeaderLineType.Integer, "Allele count in genotypes, for each ALT allele, in the same order as listed"),
new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") }; new VCFInfoHeaderLine(VCFConstants.ALLELE_NUMBER_KEY, 1, VCFHeaderLineType.Integer, "Total number of alleles in called genotypes") };
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.hasGenotypes() ) if ( ! vc.hasGenotypes() )
return null; return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
@ -16,9 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class DepthOfCoverage implements InfoFieldAnnotation, StandardAnnotation { public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( stratifiedContexts.size() == 0 ) if ( stratifiedContexts.size() == 0 )
return null; return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine; import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
@ -22,13 +23,13 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class DepthPerAlleleBySample implements GenotypeAnnotation, StandardAnnotation { public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
private static String REF_ALLELE = "REF"; private static String REF_ALLELE = "REF";
private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
if ( g == null || !g.isCalled() ) if ( g == null || !g.isCalled() )
return null; return null;

View File

@ -28,6 +28,7 @@ import cern.jet.math.Arithmetic;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
@ -42,11 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
public class FisherStrand implements InfoFieldAnnotation, StandardAnnotation { public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
private static final String FS = "FS"; private static final String FS = "FS";
private static final double MIN_PVALUE = 1E-320; private static final double MIN_PVALUE = 1E-320;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if ( ! vc.isVariant() || vc.isFiltered() ) if ( ! vc.isVariant() || vc.isFiltered() )
return null; return null;

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
@ -16,9 +17,9 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class GCContent implements InfoFieldAnnotation, ExperimentalAnnotation { public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
double content = computeGCContent(ref); double content = computeGCContent(ref);
Map<String, Object> map = new HashMap<String, Object>(); Map<String, Object> map = new HashMap<String, Object>();
map.put(getKeyNames().get(0), String.format("%.2f", content)); map.put(getKeyNames().get(0), String.format("%.2f", content));

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
@ -23,11 +24,11 @@ import java.util.Map;
*/ */
// A set of annotations calculated directly from the GLs // A set of annotations calculated directly from the GLs
public class GLstats implements InfoFieldAnnotation, StandardAnnotation { public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes(); final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

View File

@ -29,6 +29,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils; import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel; import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
@ -48,13 +49,13 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.*; import java.util.*;
public class HaplotypeScore implements InfoFieldAnnotation, StandardAnnotation { public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
private final static boolean DEBUG = false; private final static boolean DEBUG = false;
private final static int MIN_CONTEXT_WING_SIZE = 10; private final static int MIN_CONTEXT_WING_SIZE = 10;
private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50; private final static int MAX_CONSENSUS_HAPLOTYPES_TO_CONSIDER = 50;
private final static char REGEXP_WILDCARD = '.'; private final static char REGEXP_WILDCARD = '.';
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here if (stratifiedContexts.size() == 0 ) // size 0 means that call was made by someone else and we have no data here
return null; return null;

View File

@ -4,6 +4,7 @@ import org.broad.tribble.util.popgen.HardyWeinbergCalculation;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.WorkInProgressAnnotation;
import org.broadinstitute.sting.utils.QualityUtils; import org.broadinstitute.sting.utils.QualityUtils;
@ -18,13 +19,13 @@ import java.util.List;
import java.util.Map; import java.util.Map;
public class HardyWeinberg implements InfoFieldAnnotation, WorkInProgressAnnotation { public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
private static final int MIN_SAMPLES = 10; private static final int MIN_SAMPLES = 10;
private static final int MIN_GENOTYPE_QUALITY = 10; private static final int MIN_GENOTYPE_QUALITY = 10;
private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10; private static final int MIN_NEG_LOG10_PERROR = MIN_GENOTYPE_QUALITY / 10;
public Map<String, Object> annotate(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) { public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
final Map<String, Genotype> genotypes = vc.getGenotypes(); final Map<String, Genotype> genotypes = vc.getGenotypes();
if ( genotypes == null || genotypes.size() < MIN_SAMPLES ) if ( genotypes == null || genotypes.size() < MIN_SAMPLES )

Some files were not shown because too many files have changed in this diff Show More