Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/stable

This commit is contained in:
Matt Hanna 2011-11-16 09:57:13 -05:00
commit 6a5d5e7ac9
375 changed files with 13496 additions and 9533 deletions

20
.gitignore vendored 100644
View File

@ -0,0 +1,20 @@
/*.bam
/*.bai
/*.bed
*.idx
*~
/*.vcf
/*.txt
/*.csh
/.*
/*.pdf
/*.eval
*.ipr
*.iws
*.iml
.DS_Store
queueScatterGather
/foo*
/bar*
integrationtests/
public/testdata/onTheFlyOutputTest.vcf

156
build.xml
View File

@ -28,6 +28,8 @@
<property name="build.dir" value="build" /> <property name="build.dir" value="build" />
<property name="dist.dir" value="dist" /> <property name="dist.dir" value="dist" />
<property name="contract.dump.dir" value="dump" />
<property name="pipelinetest.dir" value="pipelinetests" />
<property name="lib.dir" value="lib" /> <property name="lib.dir" value="lib" />
<property name="external.dir" value="external" /> <property name="external.dir" value="external" />
<property name="public.dir" value="public" /> <property name="public.dir" value="public" />
@ -35,18 +37,26 @@
<property name="java.public.source.dir" value="${public.dir}/java/src" /> <property name="java.public.source.dir" value="${public.dir}/java/src" />
<property name="java.private.source.dir" value="${private.dir}/java/src" /> <property name="java.private.source.dir" value="${private.dir}/java/src" />
<property name="java.classes" value="${build.dir}/java/classes" /> <property name="java.classes" value="${build.dir}/java/classes" />
<property name="R.public.scripts.dir" value="${public.dir}/R/scripts" />
<property name="R.private.scripts.dir" value="${private.dir}/R/scripts" />
<property name="R.public.src.dir" value="${public.dir}/R/src" />
<!-- Legacy: Installing libraries back into the source directory
instead of the build or dist directory... intentionally avoids ant clean?? -->
<property name="R.library.dir" value="${public.dir}/R" />
<property name="R.tar.dir" value="${build.dir}/R/src" />
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
<property name="resource.file" value="StingText.properties" /> <property name="resource.file" value="StingText.properties" />
<property name="resource.path" value="${java.classes}/StingText.properties" /> <property name="resource.path" value="${java.classes}/StingText.properties" />
<property name="scala.public.source.dir" value="${public.dir}/scala/src" /> <property name="scala.public.source.dir" value="${public.dir}/scala/src" />
<property name="scala.private.source.dir" value="${private.dir}/scala/src" /> <property name="scala.private.source.dir" value="${private.dir}/scala/src" />
<property name="scala.classes" value="${build.dir}/scala/classes" /> <property name="scala.classes" value="${build.dir}/scala/classes" />
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" /> <property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
<property name="javadoc.dir" value="javadoc" /> <property name="javadoc.dir" value="javadoc" />
<property name="scaladoc.dir" value="scaladoc" /> <property name="scaladoc.dir" value="scaladoc" />
<!-- Contracts for Java --> <!-- Contracts for Java -->
<!-- By default, enabled only for test targets --> <!-- By default, enabled only for test targets -->
<!-- To disable for test targets, run with -Duse.contracts=false --> <!-- To disable for test targets, run with -Duse.contracts=false -->
@ -60,7 +70,7 @@
<!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') --> <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
<property name="halt" value="yes" /> <property name="halt" value="yes" />
<!-- should our unit test output go to a file or the screen? <!-- should our unit test output go to a file or the screen?
false means it goes to the screen (default) true to file --> false means it goes to the screen (default) true to file -->
<property name="usefile" value="false" /> <property name="usefile" value="false" />
@ -82,7 +92,7 @@
<patternset refid="java.source.pattern" /> <patternset refid="java.source.pattern" />
</fileset> </fileset>
<!-- terrible hack to get gatkdocs to see all files --> <!-- terrible hack to get gatkdocs to see all files -->
<patternset id="all.java.source.pattern"> <patternset id="all.java.source.pattern">
<include name="${java.public.source.dir}/**/*.java" /> <include name="${java.public.source.dir}/**/*.java" />
<include name="${java.private.source.dir}/**/*.java" /> <include name="${java.private.source.dir}/**/*.java" />
@ -113,7 +123,7 @@
<exclude name="testng*.jar" /> <exclude name="testng*.jar" />
<exclude name="bcel*.jar" /> <exclude name="bcel*.jar" />
</patternset> </patternset>
<path id="external.dependencies"> <path id="external.dependencies">
<fileset dir="${lib.dir}"> <fileset dir="${lib.dir}">
<patternset refid="dependency.mask" /> <patternset refid="dependency.mask" />
@ -154,16 +164,18 @@
<property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/> <property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
<property name="ivy.settings.dir" value="settings"/> <property name="ivy.settings.dir" value="settings"/>
<property file="${ivy.settings.dir}/ivysettings.properties"/> <property file="${ivy.settings.dir}/ivysettings.properties"/>
<mkdir dir="${lib.dir}"/> <mkdir dir="${lib.dir}"/>
<mkdir dir="${ivy.jar.dir}"/> <mkdir dir="${ivy.jar.dir}"/>
<!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}" <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
dest="${ivy.jar.dir}/${ivy.jar.file}" dest="${ivy.jar.dir}/${ivy.jar.file}"
usetimestamp="true"/> usetimestamp="true"/>
<taskdef resource="org/apache/ivy/ant/antlib.xml" <taskdef resource="org/apache/ivy/ant/antlib.xml"
uri="antlib:org.apache.ivy.ant" uri="antlib:org.apache.ivy.ant"
classpath="${ivy.jar.dir}/${ivy.jar.file}"/> classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
<ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/> <ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
<property name="init.resolve.done" value="true"/> <property name="init.resolve.done" value="true"/>
</target> </target>
@ -209,11 +221,11 @@
<equals arg1="${git.describe.exit.value}" arg2="0" /> <equals arg1="${git.describe.exit.value}" arg2="0" />
</condition> </condition>
</target> </target>
<target name="tagged.build.version" depends="git.describe" if="git.describe.succeeded"> <target name="tagged.build.version" depends="git.describe" if="git.describe.succeeded">
<property name="build.version" value="${git.describe.output}" /> <property name="build.version" value="${git.describe.output}" />
</target> </target>
<target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded"> <target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
<exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false"> <exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
<arg line="rev-parse HEAD" /> <arg line="rev-parse HEAD" />
@ -222,11 +234,11 @@
<equals arg1="${git.rev-parse.exit.value}" arg2="0" /> <equals arg1="${git.rev-parse.exit.value}" arg2="0" />
</condition> </condition>
</target> </target>
<target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded"> <target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
<property name="build.version" value="${git.rev-parse.output}" /> <property name="build.version" value="${git.rev-parse.output}" />
</target> </target>
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version"> <target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
<!-- Set build.version to exported if no other value has been set --> <!-- Set build.version to exported if no other value has been set -->
<property name="build.version" value="exported" /> <property name="build.version" value="exported" />
@ -264,7 +276,7 @@
<echo message="Scala build : ${scala.target}"/> <echo message="Scala build : ${scala.target}"/>
<echo message="source revision : ${build.version}"/> <echo message="source revision : ${build.version}"/>
<echo message="build time : ${build.timestamp}" /> <echo message="build time : ${build.timestamp}" />
<condition property="include.private"> <condition property="include.private">
<equals arg1="${gatk.target}" arg2="private" casesensitive="false" /> <equals arg1="${gatk.target}" arg2="private" casesensitive="false" />
</condition> </condition>
@ -310,13 +322,13 @@
<target name="gatk.compile.public.source" depends="init,resolve"> <target name="gatk.compile.public.source" depends="init,resolve">
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}"> <javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
<compilerarg value="-proc:none"/> <compilerarg value="-proc:none"/>
</javac> </javac>
</target> </target>
<target name="gatk.compile.private.source" depends="gatk.compile.public.source" if="include.private"> <target name="gatk.compile.private.source" depends="gatk.compile.public.source" if="include.private">
<javac fork="true" srcdir="${java.private.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}"> <javac fork="true" srcdir="${java.private.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
<compilerarg value="-proc:none"/> <compilerarg value="-proc:none"/>
</javac> </javac>
</target> </target>
<target name="gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source"> <target name="gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source">
@ -325,11 +337,11 @@
<property name="dist.dir" value="${external.dist.dir}" /> <property name="dist.dir" value="${external.dist.dir}" />
<property name="gatk.classpath" value="${external.gatk.classpath}" /> <property name="gatk.classpath" value="${external.gatk.classpath}" />
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" /> <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant> </subant>
</target> </target>
<target name="gatk.compile.source" <target name="gatk.compile.source"
depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
description="compile the GATK source" /> description="compile the GATK source" />
<target name="gatk.contracts.public" depends="gatk.compile.source" if="include.contracts"> <target name="gatk.contracts.public" depends="gatk.compile.source" if="include.contracts">
@ -339,9 +351,9 @@
<pathelement path="${java.classes}" /> <pathelement path="${java.classes}" />
</classpath> </classpath>
<compilerarg value="-Acom.google.java.contract.debug"/> <compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/> <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
<compilerarg value="-proc:only"/> <compilerarg value="-proc:only"/>
</javac> </javac>
</target> </target>
<target name="check.contracts.private" depends="gatk.contracts.public"> <target name="check.contracts.private" depends="gatk.contracts.public">
@ -360,14 +372,14 @@
<pathelement path="${java.classes}" /> <pathelement path="${java.classes}" />
</classpath> </classpath>
<compilerarg value="-Acom.google.java.contract.debug"/> <compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/> <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
<compilerarg value="-proc:only"/> <compilerarg value="-proc:only"/>
</javac> </javac>
</target> </target>
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private" <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
description="create GATK contracts" if="include.contracts" /> description="create GATK contracts" if="include.contracts" />
<target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" /> <target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
<target name="init.queue-extensions.generate" depends="gatk.compile"> <target name="init.queue-extensions.generate" depends="gatk.compile">
@ -411,9 +423,9 @@
<src path="${scala.public.source.dir}" /> <src path="${scala.public.source.dir}" />
<src path="${queue-extensions.source.dir}" /> <src path="${queue-extensions.source.dir}" />
<include name="**/*.scala"/> <include name="**/*.scala"/>
</scalac> </scalac>
</target> </target>
<target name="check.scala.private" depends="scala.compile.public"> <target name="check.scala.private" depends="scala.compile.public">
<condition property="include.scala.private"> <condition property="include.scala.private">
<and> <and>
@ -422,12 +434,12 @@
</and> </and>
</condition> </condition>
</target> </target>
<target name="scala.compile.private" depends="check.scala.private" if="include.scala.private"> <target name="scala.compile.private" depends="check.scala.private" if="include.scala.private">
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes"> <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
<src path="${scala.private.source.dir}" /> <src path="${scala.private.source.dir}" />
<include name="**/*.scala"/> <include name="**/*.scala"/>
</scalac> </scalac>
</target> </target>
<target name="scala.compile" depends="scala.compile.public,scala.compile.private" if="scala.include" description="compile Scala" /> <target name="scala.compile" depends="scala.compile.public,scala.compile.private" if="scala.include" description="compile Scala" />
@ -530,6 +542,11 @@
<target name="sting.compile" depends="gatk.compile, scala.compile" /> <target name="sting.compile" depends="gatk.compile, scala.compile" />
<target name="R.public.tar">
<mkdir dir="${R.tar.dir}/${R.package.path}" />
<tar compression="gzip" basedir="${R.public.src.dir}/${R.package.path}" includes="gsalib/**" destfile="${R.tar.dir}/${R.package.path}/gsalib.tar.gz" />
</target>
<target name="init.jar" depends="sting.compile,extracthelp"> <target name="init.jar" depends="sting.compile,extracthelp">
<mkdir dir="${dist.dir}"/> <mkdir dir="${dist.dir}"/>
<copy todir="${dist.dir}"> <copy todir="${dist.dir}">
@ -537,7 +554,7 @@
</copy> </copy>
</target> </target>
<target name="sting-utils.jar" depends="gatk.compile, init.jar"> <target name="sting-utils.jar" depends="gatk.compile, init.jar, R.public.tar">
<jar jarfile="${dist.dir}/StingUtils.jar"> <jar jarfile="${dist.dir}/StingUtils.jar">
<fileset dir="${java.classes}"> <fileset dir="${java.classes}">
<include name="**/utils/**/*.class"/> <include name="**/utils/**/*.class"/>
@ -549,6 +566,15 @@
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/> <fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/> <fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/> <fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
<fileset dir="${R.tar.dir}">
<include name="**/${R.package.path}/**/*.tar.gz"/>
</fileset>
<fileset dir="${R.public.scripts.dir}">
<include name="**/utils/**/*.R"/>
</fileset>
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
<include name="**/utils/**/*.R"/>
</fileset>
<manifest> <manifest>
<attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" /> <attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" />
</manifest> </manifest>
@ -577,6 +603,14 @@
<include name="**/gatk/**/*.class" /> <include name="**/gatk/**/*.class" />
<include name="**/alignment/**/*.class"/> <include name="**/alignment/**/*.class"/>
</fileset> </fileset>
<fileset dir="${R.public.scripts.dir}">
<include name="**/gatk/**/*.R"/>
<include name="**/alignment/**/*.R"/>
</fileset>
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
<include name="**/gatk/**/*.R"/>
<include name="**/alignment/**/*.R"/>
</fileset>
<manifest> <manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" /> <attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
</manifest> </manifest>
@ -591,6 +625,14 @@
<include name="**/analyzecovariates/**/*.class" /> <include name="**/analyzecovariates/**/*.class" />
<include name="**/gatk/walkers/recalibration/*.class" /> <include name="**/gatk/walkers/recalibration/*.class" />
</fileset> </fileset>
<fileset dir="${R.public.scripts.dir}">
<include name="**/analyzecovariates/**/*.R"/>
<include name="**/gatk/walkers/recalibration/**/*.R"/>
</fileset>
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
<include name="**/analyzecovariates/**/*.R"/>
<include name="**/gatk/walkers/recalibration/**/*.R"/>
</fileset>
<manifest> <manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" /> <attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
</manifest> </manifest>
@ -603,28 +645,7 @@
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" /> <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
</subant> </subant>
</target> </target>
<!--
<target name="gatk.oneoffs.jar" depends="gatk.compile, init.jar"
description="generate the GATK oneoffs distribution" if="include.oneoffs">
<jar jarfile="${dist.dir}/CompareBAMAlignments.jar" whenmanifestonly="skip">
<fileset dir="${java.classes}">
<include name="**/tools/**/*.class" />
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.oneoffprojects.tools.CompareBAMAlignments" />
</manifest>
</jar>
<jar jarfile="${dist.dir}/SliceBams.jar" whenmanifestonly="skip">
<fileset dir="${java.classes}">
<include name="**/tools/**/*.class" />
</fileset>
<manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.SliceBams" />
</manifest>
</jar>
</target>
-->
<target name="scala.jar" depends="scala.compile, init.jar" if="scala.include"> <target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
<jar jarfile="${dist.dir}/GATKScala.jar"> <jar jarfile="${dist.dir}/GATKScala.jar">
<fileset dir="${scala.classes}"> <fileset dir="${scala.classes}">
@ -641,6 +662,12 @@
<fileset dir="${java.classes}"> <fileset dir="${java.classes}">
<include name="org/broadinstitute/sting/queue/**/*.class" /> <include name="org/broadinstitute/sting/queue/**/*.class" />
</fileset> </fileset>
<fileset dir="${R.public.scripts.dir}">
<include name="org/broadinstitute/sting/queue/**/*.R"/>
</fileset>
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
<include name="org/broadinstitute/sting/queue/**/*.R"/>
</fileset>
<manifest> <manifest>
<attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" /> <attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
</manifest> </manifest>
@ -680,20 +707,7 @@
</jar> </jar>
</target> </target>
<!--
<target name="gatk.oneoffs.manifests" depends="gatk.oneoffs.jar, init.manifests" if="include.oneoffs">
<jar jarfile="${dist.dir}/CompareBAMAlignments.jar" update="true" whenmanifestonly="skip">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
<jar jarfile="${dist.dir}/SliceBams.jar" update="true" whenmanifestonly="skip">
<manifest>
<attribute name="Class-Path" value="${jar.classpath}" />
</manifest>
</jar>
</target>
-->
<target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include"> <target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
<jar jarfile="${dist.dir}/Queue.jar" update="true" > <jar jarfile="${dist.dir}/Queue.jar" update="true" >
<manifest> <manifest>
@ -778,10 +792,6 @@
<pathelement location="${testng.jar}"/> <pathelement location="${testng.jar}"/>
</classpath> </classpath>
<compilerarg value="-proc:none"/> <compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac> </javac>
</target> </target>
@ -798,10 +808,6 @@
<pathelement location="${testng.jar}"/> <pathelement location="${testng.jar}"/>
</classpath> </classpath>
<compilerarg value="-proc:none"/> <compilerarg value="-proc:none"/>
<!--
<compilerarg value="-Acom.google.java.contract.debug"/>
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
-->
</javac> </javac>
</target> </target>
@ -849,6 +855,9 @@
<pathelement location="${java.private.test.classes}" /> <pathelement location="${java.private.test.classes}" />
<pathelement location="${scala.public.test.classes}" /> <pathelement location="${scala.public.test.classes}" />
<pathelement location="${scala.private.test.classes}" /> <pathelement location="${scala.private.test.classes}" />
<pathelement location="${R.tar.dir}" />
<pathelement location="${R.public.scripts.dir}" />
<pathelement location="${R.private.scripts.dir}" />
</path> </path>
<path id="testng.gatk.releasetest.classpath"> <path id="testng.gatk.releasetest.classpath">
@ -1185,19 +1194,18 @@
</target> </target>
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs"> <target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
<delete dir="out"/>
<delete dir="${build.dir}"/> <delete dir="${build.dir}"/>
<delete dir="${lib.dir}"/> <delete dir="${lib.dir}"/>
<delete dir="dump"/> <delete dir="${contract.dump.dir}"/>
<delete dir="${staging.dir}"/> <delete dir="${staging.dir}"/>
<delete dir="${dist.dir}"/> <delete dir="${dist.dir}"/>
<delete dir="pipelinetests"/> <delete dir="${pipelinetest.dir}"/>
</target> </target>
<!-- Build gsalib R module --> <!-- Build gsalib R module -->
<target name="gsalib"> <target name="gsalib">
<exec executable="R" failonerror="true"> <exec executable="R" failonerror="true">
<arg line="R CMD INSTALL -l public/R/ public/R/src/gsalib/" /> <arg line="R CMD INSTALL -l ${R.library.dir} ${R.public.src.dir}/${R.package.path}/gsalib" />
</exec> </exec>
</target> </target>
</project> </project>

View File

@ -1,5 +1,7 @@
#!/bin/env Rscript #!/bin/env Rscript
library(tools)
args <- commandArgs(TRUE) args <- commandArgs(TRUE)
verbose = TRUE verbose = TRUE
@ -47,6 +49,9 @@ if( is.numeric(c$Covariate) ) {
} }
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
# #
# Plot mean quality versus the covariate # Plot mean quality versus the covariate
@ -69,6 +74,10 @@ if( is.numeric(c$Covariate) ) {
} }
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
# #
# Plot histogram of the covariate # Plot histogram of the covariate
# #
@ -106,3 +115,7 @@ if( is.numeric(c$Covariate) ) {
axis(2,axTicks(2), format(axTicks(2), scientific=F)) axis(2,axTicks(2), format(axTicks(2), scientific=F))
} }
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}

View File

@ -1,5 +1,7 @@
#!/bin/env Rscript #!/bin/env Rscript
library(tools)
args <- commandArgs(TRUE) args <- commandArgs(TRUE)
input = args[1] input = args[1]
@ -33,6 +35,10 @@ points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
abline(0,1, lty=2) abline(0,1, lty=2)
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
# #
# Plot Q empirical histogram # Plot Q empirical histogram
# #
@ -52,6 +58,10 @@ points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
axis(2,axTicks(2), format(axTicks(2), scientific=F)) axis(2,axTicks(2), format(axTicks(2), scientific=F))
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}
# #
# Plot Q reported histogram # Plot Q reported histogram
# #
@ -68,3 +78,7 @@ plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yM
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1") points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
axis(2,axTicks(2), format(axTicks(2), scientific=F)) axis(2,axTicks(2), format(axTicks(2), scientific=F))
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}

View File

@ -1,5 +1,7 @@
#!/bin/env Rscript #!/bin/env Rscript
library(tools)
args <- commandArgs(TRUE) args <- commandArgs(TRUE)
verbose = TRUE verbose = TRUE
@ -85,3 +87,7 @@ if ( ! is.null(sensitivity) ) {
} }
dev.off() dev.off()
if (exists('compactPDF')) {
compactPDF(outfile)
}

View File

@ -12,20 +12,20 @@ if ( onCMDLine ) {
inputFileName = args[1] inputFileName = args[1]
outputPDF = args[2] outputPDF = args[2]
} else { } else {
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt" inputFileName = "~/Desktop/broadLocal/GATK/unstable/wgs.jobreport.txt"
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt" #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
outputPDF = NA outputPDF = NA
} }
RUNTIME_UNITS = "(sec)" RUNTIME_UNITS = "(hours)"
ORIGINAL_UNITS_TO_SECONDS = 1/1000 ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
# #
# Helper function to aggregate all of the jobs in the report across all tables # Helper function to aggregate all of the jobs in the report across all tables
# #
allJobsFromReport <- function(report) { allJobsFromReport <- function(report) {
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts") names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime")
sub <- lapply(report, function(table) table[,names]) sub <- lapply(report, function(table) table[,names])
do.call("rbind", sub) do.call("rbind", sub)
} }
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
# #
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
# #
plotJobsGantt <- function(gatkReport, sortOverall) { plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
allJobs = allJobsFromReport(gatkReport) allJobs = allJobsFromReport(gatkReport)
if ( sortOverall ) { if ( sortOverall ) {
title = "All jobs, by analysis, by start time" title = "All jobs, by analysis, by start time"
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
} }
allJobs$index = 1:nrow(allJobs) allJobs$index = 1:nrow(allJobs)
minTime = min(allJobs$startTime) minTime = min(allJobs$startTime)
allJobs$relStartTime = allJobs$startTime - minTime allJobs$relStartTime = allJobs$startTime - minTime
allJobs$relDoneTime = allJobs$doneTime - minTime allJobs$relDoneTime = allJobs$doneTime - minTime
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts) allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
maxRelTime = max(allJobs$relDoneTime) maxRelTime = max(allJobs$relDoneTime)
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName)) p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm"))) p <- p + theme_bw()
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2) p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
if ( includeText )
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
p <- p + xlim(0, maxRelTime * 1.1) p <- p + xlim(0, maxRelTime * 1.1)
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS)) p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS))
p <- p + ylab("Job") p <- p + ylab("Job number")
p <- p + opts(title=title) p <- p + opts(title=title)
print(p) print(p)
} }
@ -119,7 +121,7 @@ plotGroup <- function(groupTable) {
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) { if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
# todo -- how do we group by annotations? # todo -- how do we group by annotations?
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram() p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
p <- p + xlab("runtime in seconds") + ylab("No. of jobs") p <- p + xlab(paste("runtime", RUNTIME_UNITS)) + ylab("No. of jobs")
p <- p + opts(title=paste("Job runtime histogram for", name)) p <- p + opts(title=paste("Job runtime histogram for", name))
print(p) print(p)
} }
@ -139,9 +141,9 @@ print(paste("Project :", inputFileName))
convertUnits <- function(gatkReportData) { convertUnits <- function(gatkReportData) {
convertGroup <- function(g) { convertGroup <- function(g) {
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
g g
} }
lapply(gatkReportData, convertGroup) lapply(gatkReportData, convertGroup)
@ -157,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
pdf(outputPDF, height=8.5, width=11) pdf(outputPDF, height=8.5, width=11)
} }
plotJobsGantt(gatkReportData, T) plotJobsGantt(gatkReportData, T, F)
plotJobsGantt(gatkReportData, F) plotJobsGantt(gatkReportData, F, F)
plotProgressByTime(gatkReportData) plotProgressByTime(gatkReportData)
for ( group in gatkReportData ) { for ( group in gatkReportData ) {
plotGroup(group) plotGroup(group)

View File

@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv); .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
} }
gatkreport = as.list(tableEnv); gatkreport = as.list(tableEnv, all.names=TRUE);
} }

View File

Before

Width:  |  Height:  |  Size: 49 KiB

After

Width:  |  Height:  |  Size: 49 KiB

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.alignment; package org.broadinstitute.sting.alignment;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.BaseUtils; import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Iterator; import java.util.Iterator;
@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
/** /**
* Aligns a read to the given reference. * Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align. * @param read Read to align.
* @return Number of reads aligned by this map (aka 1). * @return Number of reads aligned by this map (aka 1).
*/ */
@Override @Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
//logger.info(String.format("examining read %s", read.getReadName())); //logger.info(String.format("examining read %s", read.getReadName()));
byte[] bases = read.getReadBases(); byte[] bases = read.getReadBases();

View File

@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.gatk.walkers.WalkerName; import org.broadinstitute.sting.gatk.walkers.WalkerName;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.File; import java.io.File;
@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
/** /**
* Aligns a read to the given reference. * Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align. * @param read Read to align.
* @return Number of alignments found for this read. * @return Number of alignments found for this read.
*/ */
@Override @Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
SAMRecord alignedRead = aligner.align(read,header); SAMRecord alignedRead = aligner.align(read,header);
out.addAlignment(alignedRead); out.addAlignment(alignedRead);
return 1; return 1;

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.alignment; package org.broadinstitute.sting.alignment;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration; import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
import org.broadinstitute.sting.alignment.bwa.BWTFiles; import org.broadinstitute.sting.alignment.bwa.BWTFiles;
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner; import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream; import java.io.PrintStream;
import java.util.Iterator; import java.util.Iterator;
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
/** /**
* Aligns a read to the given reference. * Aligns a read to the given reference.
*
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null. * @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
* @param read Read to align. * @param read Read to align.
* @return Number of alignments found for this read. * @return Number of alignments found for this read.
*/ */
@Override @Override
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator(); Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
if(alignmentIterator.hasNext()) { if(alignmentIterator.hasNext()) {
int numAlignments = alignmentIterator.next().length; int numAlignments = alignmentIterator.next().length;

View File

@ -25,6 +25,9 @@
package org.broadinstitute.sting.analyzecovariates; package org.broadinstitute.sting.analyzecovariates;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.CommandLineProgram; import org.broadinstitute.sting.commandline.CommandLineProgram;
@ -33,14 +36,16 @@ import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection; import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
import org.broadinstitute.sting.utils.R.RScriptExecutor; import org.broadinstitute.sting.utils.R.RScriptExecutor;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.PluginManager; import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException; import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.io.Resource;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*; import java.io.*;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection; import java.util.Collection;
import java.util.Map; import java.util.Map;
import java.util.regex.Pattern; import java.util.regex.Pattern;
@ -71,15 +76,13 @@ import java.util.regex.Pattern;
* </ul> * </ul>
* *
* <p> * <p>
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options * NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R, * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
* *
* <p> * <p>
* See the GATK wiki for a tutorial and example recalibration accuracy plots. * See the GATK wiki for a tutorial and example recalibration accuracy plots.
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration * <a target="gatkwiki" href="http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration"
* >http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration</a>
* *
* <h2>Input</h2> * <h2>Input</h2>
* <p> * <p>
@ -91,7 +94,6 @@ import java.util.regex.Pattern;
* java -Xmx4g -jar AnalyzeCovariates.jar \ * java -Xmx4g -jar AnalyzeCovariates.jar \
* -recalFile /path/to/recal.table.csv \ * -recalFile /path/to/recal.table.csv \
* -outputDir /path/to/output_dir/ \ * -outputDir /path/to/output_dir/ \
* -resources resources/ \
* -ignoreQ 5 * -ignoreQ 5
* </pre> * </pre>
* *
@ -101,6 +103,11 @@ import java.util.regex.Pattern;
groupName = "AnalyzeCovariates", groupName = "AnalyzeCovariates",
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator") summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
public class AnalyzeCovariates extends CommandLineProgram { public class AnalyzeCovariates extends CommandLineProgram {
final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class);
private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R";
private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R";
private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R";
///////////////////////////// /////////////////////////////
// Command Line Arguments // Command Line Arguments
@ -114,11 +121,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false) @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
private String RECAL_FILE = "output.recal_data.csv"; private String RECAL_FILE = "output.recal_data.csv";
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false) @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
private String OUTPUT_DIR = "analyzeCovariates/"; private File OUTPUT_DIR = new File("analyzeCovariates");
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
private String PATH_TO_RSCRIPT = "Rscript";
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
private String PATH_TO_RESOURCES = "public/R/";
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false) @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
private int IGNORE_QSCORES_LESS_THAN = 5; private int IGNORE_QSCORES_LESS_THAN = 5;
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false) @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
@ -154,29 +157,26 @@ public class AnalyzeCovariates extends CommandLineProgram {
protected int execute() { protected int execute() {
// create the output directory where all the data tables and plots will go // create the output directory where all the data tables and plots will go
try { if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs())
Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR); throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR);
} catch (IOException e) {
System.out.println("Couldn't create directory: " + OUTPUT_DIR); if (!RScriptExecutor.RSCRIPT_EXISTS)
System.out.println("User is responsible for making sure the output directory exists."); Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated.");
}
if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
// initialize all the data from the csv file and allocate the list of covariates // initialize all the data from the csv file and allocate the list of covariates
System.out.println("Reading in input csv file..."); logger.info("Reading in input csv file...");
initializeData(); initializeData();
System.out.println("...Done!"); logger.info("...Done!");
// output data tables for Rscript to read in // output data tables for Rscript to read in
System.out.println("Writing out intermediate tables for R..."); logger.info("Writing out intermediate tables for R...");
writeDataTables(); writeDataTables();
System.out.println("...Done!"); logger.info("...Done!");
// perform the analysis using Rscript and output the plots // perform the analysis using Rscript and output the plots
System.out.println("Calling analysis R scripts and writing out figures..."); logger.info("Calling analysis R scripts and writing out figures...");
callRScripts(); callRScripts();
System.out.println("...Done!"); logger.info("...Done!");
return 0; return 0;
} }
@ -287,37 +287,40 @@ public class AnalyzeCovariates extends CommandLineProgram {
if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) { if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
String readGroup = readGroupKey.toString(); String readGroup = readGroupKey.toString();
RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey); RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations" ); logger.info(String.format(
System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported())); "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
readGroup, readGroupDatum.getNumObservations(),
readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
// for each covariate // for each covariate
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
Covariate cov = requestedCovariates.get(iii); Covariate cov = requestedCovariates.get(iii);
// Create a PrintStream // Create a PrintStream
PrintStream output = null; File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
PrintStream output;
try { try {
output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat")); output = new PrintStream(FileUtils.openOutputStream(outputFile));
} catch (IOException e) {
} catch (FileNotFoundException e) { throw new UserException.CouldNotCreateOutputFile(outputFile, e);
System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
System.exit(-1);
} }
// Output the header try {
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases"); // Output the header
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) { for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
output.print( covariateKey.toString() + "\t" ); // Covariate output.print( covariateKey.toString() + "\t" ); // Covariate
RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey); RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
output.println( thisDatum.getNumObservations() ); // nBases output.println( thisDatum.getNumObservations() ); // nBases
}
} finally {
// Close the PrintStream
IOUtils.closeQuietly(output);
} }
// Close the PrintStream
output.close();
} }
} else { } else {
break; break;
@ -327,10 +330,6 @@ public class AnalyzeCovariates extends CommandLineProgram {
} }
private void callRScripts() { private void callRScripts() {
RScriptExecutor.RScriptArgumentCollection argumentCollection =
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
int numReadGroups = 0; int numReadGroups = 0;
// for each read group // for each read group
@ -338,23 +337,32 @@ public class AnalyzeCovariates extends CommandLineProgram {
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) { if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
String readGroup = readGroupKey.toString(); String readGroup = readGroupKey.toString();
System.out.println("Analyzing read group: " + readGroup); logger.info("Analyzing read group: " + readGroup);
// for each covariate // for each covariate
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) { for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
Covariate cov = requestedCovariates.get(iii); Covariate cov = requestedCovariates.get(iii);
final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"; final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
if (DO_INDEL_QUALITY) { if (DO_INDEL_QUALITY) {
executor.callRScripts("plot_indelQuality.R", outputFilename, RScriptExecutor executor = new RScriptExecutor();
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class));
// The second argument is the name of the covariate in order to make the plots look nice
executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
executor.exec();
} else { } else {
if( iii == 1 ) { if( iii == 1 ) {
// Analyze reported quality // Analyze reported quality
executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename, RScriptExecutor executor = new RScriptExecutor();
IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class));
// The second argument is the Q scores that should be turned pink in the plot because they were ignored
executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE);
executor.exec();
} else { // Analyze all other covariates } else { // Analyze all other covariates
executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename, RScriptExecutor executor = new RScriptExecutor();
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class));
// The second argument is the name of the covariate in order to make the plots look nice
executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
executor.exec();
} }
} }
} }

View File

@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
/** /**
* Maps indices of command line arguments to values paired with that argument. * Maps indices of command line arguments to values paired with that argument.
*/ */
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>(); public final SortedMap<ArgumentMatchSite,List<String>> sites = new TreeMap<ArgumentMatchSite,List<String>>();
/** /**
* An ordered, freeform collection of tags. * An ordered, freeform collection of tags.
@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
} }
/** /**
* A simple way of indicating that an argument with the given label and definition exists at this index. * A simple way of indicating that an argument with the given label and definition exists at this site.
* @param label Label of the argument match. Must not be null. * @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null. * @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null. * @param site Position of the argument. Must not be null.
* @param tags ordered freeform text tags associated with this argument. * @param tags ordered freeform text tags associated with this argument.
*/ */
public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) { public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) {
this( label, definition, index, null, tags ); this( label, definition, site, null, tags );
} }
/** /**
* A simple way of indicating that an argument with the given label and definition exists at this index. * A simple way of indicating that an argument with the given label and definition exists at this site.
* @param label Label of the argument match. Must not be null. * @param label Label of the argument match. Must not be null.
* @param definition The associated definition, if one exists. May be null. * @param definition The associated definition, if one exists. May be null.
* @param index Position of the argument. Must not be null. * @param site Position of the argument. Must not be null.
* @param value Value for the argument at this position. * @param value Value for the argument at this position.
* @param tags ordered freeform text tags associated with this argument. * @param tags ordered freeform text tags associated with this argument.
*/ */
private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) { private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) {
this.label = label; this.label = label;
this.definition = definition; this.definition = definition;
ArrayList<String> values = new ArrayList<String>(); ArrayList<String> values = new ArrayList<String>();
if( value != null ) if( value != null )
values.add(value); values.add(value);
indices.put(index,values ); sites.put(site,values );
this.tags = tags; this.tags = tags;
} }
@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
ArgumentMatch otherArgumentMatch = (ArgumentMatch)other; ArgumentMatch otherArgumentMatch = (ArgumentMatch)other;
return this.definition.equals(otherArgumentMatch.definition) && return this.definition.equals(otherArgumentMatch.definition) &&
this.label.equals(otherArgumentMatch.label) && this.label.equals(otherArgumentMatch.label) &&
this.indices.equals(otherArgumentMatch.indices) && this.sites.equals(otherArgumentMatch.sites) &&
this.tags.equals(otherArgumentMatch.tags); this.tags.equals(otherArgumentMatch.tags);
} }
@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
* @param key Key which specifies the transform. * @param key Key which specifies the transform.
* @return A variant of this ArgumentMatch with all keys transformed. * @return A variant of this ArgumentMatch with all keys transformed.
*/ */
@SuppressWarnings("unchecked")
ArgumentMatch transform(Multiplexer multiplexer, Object key) { ArgumentMatch transform(Multiplexer multiplexer, Object key) {
SortedMap<Integer,List<String>> newIndices = new TreeMap<Integer,List<String>>(); SortedMap<ArgumentMatchSite,List<String>> newIndices = new TreeMap<ArgumentMatchSite,List<String>>();
for(Map.Entry<Integer,List<String>> index: indices.entrySet()) { for(Map.Entry<ArgumentMatchSite,List<String>> site: sites.entrySet()) {
List<String> newEntries = new ArrayList<String>(); List<String> newEntries = new ArrayList<String>();
for(String entry: index.getValue()) for(String entry: site.getValue())
newEntries.add(multiplexer.transformArgument(key,entry)); newEntries.add(multiplexer.transformArgument(key,entry));
newIndices.put(index.getKey(),newEntries); newIndices.put(site.getKey(),newEntries);
} }
ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition); ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition);
newArgumentMatch.indices.putAll(newIndices); newArgumentMatch.sites.putAll(newIndices);
return newArgumentMatch; return newArgumentMatch;
} }
@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
public Iterator<ArgumentMatch> iterator() { public Iterator<ArgumentMatch> iterator() {
return new Iterator<ArgumentMatch>() { return new Iterator<ArgumentMatch>() {
/** /**
* Iterate over each the available index. * Iterate over each the available site.
*/ */
private Iterator<Integer> indexIterator = null; private Iterator<ArgumentMatchSite> siteIterator = null;
/** /**
* Iterate over each available token. * Iterate over each available token.
@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
private Iterator<String> tokenIterator = null; private Iterator<String> tokenIterator = null;
/** /**
* The next index to return. Null if none remain. * The next site to return. Null if none remain.
*/ */
Integer nextIndex = null; ArgumentMatchSite nextSite = null;
/** /**
* The next token to return. Null if none remain. * The next token to return. Null if none remain.
@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
String nextToken = null; String nextToken = null;
{ {
indexIterator = indices.keySet().iterator(); siteIterator = sites.keySet().iterator();
prepareNext(); prepareNext();
} }
@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
* @return True if there's another token waiting in the wings. False otherwise. * @return True if there's another token waiting in the wings. False otherwise.
*/ */
public boolean hasNext() { public boolean hasNext() {
return nextToken != null; return nextToken != null;
} }
/** /**
@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
* @return The next ArgumentMatch in the series. Should never be null. * @return The next ArgumentMatch in the series. Should never be null.
*/ */
public ArgumentMatch next() { public ArgumentMatch next() {
if( nextIndex == null || nextToken == null ) if( nextSite == null || nextToken == null )
throw new IllegalStateException( "No more ArgumentMatches are available" ); throw new IllegalStateException( "No more ArgumentMatches are available" );
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags ); ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags );
prepareNext(); prepareNext();
return match; return match;
} }
/** /**
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available, * Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
* initialize nextIndex / nextToken to null. * initialize nextSite / nextToken to null.
*/ */
private void prepareNext() { private void prepareNext() {
if( tokenIterator != null && tokenIterator.hasNext() ) { if( tokenIterator != null && tokenIterator.hasNext() ) {
nextToken = tokenIterator.next(); nextToken = tokenIterator.next();
} }
else { else {
nextIndex = null; nextSite = null;
nextToken = null; nextToken = null;
// Do a nested loop. While more data is present in the inner loop, grab that data. // Do a nested loop. While more data is present in the inner loop, grab that data.
// Otherwise, troll the outer iterator looking for more data. // Otherwise, troll the outer iterator looking for more data.
while( indexIterator.hasNext() ) { while( siteIterator.hasNext() ) {
nextIndex = indexIterator.next(); nextSite = siteIterator.next();
if( indices.get(nextIndex) != null ) { if( sites.get(nextSite) != null ) {
tokenIterator = indices.get(nextIndex).iterator(); tokenIterator = sites.get(nextSite).iterator();
if( tokenIterator.hasNext() ) { if( tokenIterator.hasNext() ) {
nextToken = tokenIterator.next(); nextToken = tokenIterator.next();
break; break;
@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
* @param other The other match to merge into. * @param other The other match to merge into.
*/ */
public void mergeInto( ArgumentMatch other ) { public void mergeInto( ArgumentMatch other ) {
indices.putAll(other.indices); sites.putAll(other.sites);
} }
/** /**
* Associate a value with this merge maapping. * Associate a value with this merge maapping.
* @param index index of the command-line argument to which this value is mated. * @param site site of the command-line argument to which this value is mated.
* @param value Text representation of value to add. * @param value Text representation of value to add.
*/ */
public void addValue( int index, String value ) { public void addValue( ArgumentMatchSite site, String value ) {
if( !indices.containsKey(index) || indices.get(index) == null ) if( !sites.containsKey(site) || sites.get(site) == null )
indices.put(index, new ArrayList<String>() ); sites.put(site, new ArrayList<String>() );
indices.get(index).add(value); sites.get(site).add(value);
} }
/** /**
* Does this argument already have a value at the given site? * Does this argument already have a value at the given site?
* Arguments are only allowed to be single-valued per site, and * Arguments are only allowed to be single-valued per site, and
* flags aren't allowed a value at all. * flags aren't allowed a value at all.
* @param index Index at which to check for values. * @param site Site at which to check for values.
* @return True if the argument has a value at the given site. False otherwise. * @return True if the argument has a value at the given site. False otherwise.
*/ */
public boolean hasValueAtSite( int index ) { public boolean hasValueAtSite( ArgumentMatchSite site ) {
return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag(); return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag();
} }
/** /**
@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
*/ */
public List<String> values() { public List<String> values() {
List<String> values = new ArrayList<String>(); List<String> values = new ArrayList<String>();
for( int index: indices.keySet() ) { for( ArgumentMatchSite site: sites.keySet() ) {
if( indices.get(index) != null ) if( sites.get(site) != null )
values.addAll(indices.get(index)); values.addAll(sites.get(site));
} }
return values; return values;
} }

View File

@ -0,0 +1,76 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
/**
* Which source and the index within the source where an argument match was found.
*/
public class ArgumentMatchSite implements Comparable<ArgumentMatchSite> {
private final ArgumentMatchSource source;
private final int index;
public ArgumentMatchSite(ArgumentMatchSource source, int index) {
this.source = source;
this.index = index;
}
public ArgumentMatchSource getSource() {
return source;
}
public int getIndex() {
return index;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ArgumentMatchSite that = (ArgumentMatchSite) o;
return (index == that.index) && (source == null ? that.source == null : source.equals(that.source));
}
@Override
public int hashCode() {
int result = source != null ? source.hashCode() : 0;
// Generated by intellij. No other special reason to this implementation. -ks
result = 31 * result + index;
return result;
}
@Override
public int compareTo(ArgumentMatchSite that) {
int comp = this.source.compareTo(that.source);
if (comp != 0)
return comp;
// Both files are the same.
if (this.index == that.index)
return 0;
return this.index < that.index ? -1 : 1;
}
}

View File

@ -0,0 +1,98 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import java.io.File;
/**
* Where an argument match originated, via the commandline or a file.
*/
public class ArgumentMatchSource implements Comparable<ArgumentMatchSource> {
public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null);
private final ArgumentMatchSourceType type;
private final File file;
/**
* Creates an argument match source from the specified file.
* @param file File specifying the arguments. Must not be null.
*/
public ArgumentMatchSource(File file) {
this(ArgumentMatchSourceType.File, file);
}
private ArgumentMatchSource(ArgumentMatchSourceType type, File file) {
if (type == ArgumentMatchSourceType.File && file == null)
throw new IllegalArgumentException("An argument match source of type File cannot have a null file.");
this.type = type;
this.file = file;
}
public ArgumentMatchSourceType getType() {
return type;
}
public File getFile() {
return file;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ArgumentMatchSource that = (ArgumentMatchSource) o;
return (type == that.type) && (file == null ? that.file == null : file.equals(that.file));
}
@Override
public int hashCode() {
int result = type != null ? type.hashCode() : 0;
result = 31 * result + (file != null ? file.hashCode() : 0);
return result;
}
/**
* Compares two sources, putting the command line first, then files.
*/
@Override
public int compareTo(ArgumentMatchSource that) {
int comp = this.type.compareTo(that.type);
if (comp != 0)
return comp;
File f1 = this.file;
File f2 = that.file;
if ((f1 == null) ^ (f2 == null)) {
// If one of the files is null and the other is not
// put the null file first
return f1 == null ? -1 : 1;
}
return f1 == null ? 0 : f1.compareTo(f2);
}
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
/**
* Type of where an argument match originated, via the commandline or a file.
*/
public enum ArgumentMatchSourceType {
CommandLine, File
}

View File

@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
* Collection matches from argument definition to argument value. * Collection matches from argument definition to argument value.
* Package protected access is deliberate. * Package protected access is deliberate.
*/ */
Map<Integer,ArgumentMatch> argumentMatches = new TreeMap<Integer,ArgumentMatch>(); Map<ArgumentMatchSite,ArgumentMatch> argumentMatches = new TreeMap<ArgumentMatchSite,ArgumentMatch>();
/** /**
* Provide a place to put command-line argument values that don't seem to belong to * Provide a place to put command-line argument values that don't seem to belong to
@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
* @param site Site at which to check. * @param site Site at which to check.
* @return True if the site has a match. False otherwise. * @return True if the site has a match. False otherwise.
*/ */
boolean hasMatch( int site ) { boolean hasMatch( ArgumentMatchSite site ) {
return argumentMatches.containsKey( site ); return argumentMatches.containsKey( site );
} }
@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
* @return The match present at the given site. * @return The match present at the given site.
* @throws IllegalArgumentException if site does not contain a match. * @throws IllegalArgumentException if site does not contain a match.
*/ */
ArgumentMatch getMatch( int site ) { ArgumentMatch getMatch( ArgumentMatchSite site ) {
if( !argumentMatches.containsKey(site) ) if( !argumentMatches.containsKey(site) )
throw new IllegalArgumentException( "Site does not contain an argument: " + site ); throw new IllegalArgumentException( "Site does not contain an argument: " + site );
return argumentMatches.get(site); return argumentMatches.get(site);
@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
/** /**
* Return all argument matches of this source. * Return all argument matches of this source.
* @param parsingEngine Parsing engine.
* @param argumentSource Argument source to match. * @param argumentSource Argument source to match.
* @return List of all matches. * @return List of all matches.
*/ */
@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
* TODO: Generify this. * TODO: Generify this.
* @param multiplexer Multiplexer that controls the transformation process. * @param multiplexer Multiplexer that controls the transformation process.
* @param key Key which specifies the transform. * @param key Key which specifies the transform.
* @return new argument matches.
*/ */
ArgumentMatches transform(Multiplexer multiplexer, Object key) { ArgumentMatches transform(Multiplexer multiplexer, Object key) {
ArgumentMatches newArgumentMatches = new ArgumentMatches(); ArgumentMatches newArgumentMatches = new ArgumentMatches();
@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
for( ArgumentMatch argumentMatch: getUniqueMatches() ) { for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) { if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
argumentMatch.mergeInto( match ); argumentMatch.mergeInto( match );
for( int index: match.indices.keySet() ) for( ArgumentMatchSite site: match.sites.keySet() )
argumentMatches.put( index, argumentMatch ); argumentMatches.put( site, argumentMatch );
definitionExists = true; definitionExists = true;
} }
} }
if( !definitionExists ) { if( !definitionExists ) {
for( int index: match.indices.keySet() ) for( ArgumentMatchSite site: match.sites.keySet() )
argumentMatches.put( index, match ); argumentMatches.put( site, match );
} }
} }

View File

@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) { public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return parse(parsingEngine, source, type, matches, false);
}
/**
* The actual argument parsing method.
*
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
* continue to input them out of order (whereas Tribble Features are ordered).
*
* @param parsingEngine parsing engine
* @param source source
* @param type type to check
* @param matches matches
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
*/
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source); ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
String value = getArgumentValue( defaultDefinition, matches ); String value = getArgumentValue( defaultDefinition, matches );
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type); Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
if ( tags.getPositionalTags().size() > 2 ) { if ( tags.getPositionalTags().size() > 2 ) {
throw new UserException.CommandLineException( throw new UserException.CommandLineException(
String.format("Unexpected number of positional tags for argument %s : %s. " + String.format("Unexpected number of positional tags for argument %s : %s. " +
"Rod bindings only suport -X:type and -X:name,type argument styles", "Rod bindings only support -X:type and -X:name,type argument styles",
value, source.field.getName())); value, source.field.getName()));
} if ( tags.getPositionalTags().size() == 2 ) { } if ( tags.getPositionalTags().size() == 2 ) {
// -X:name,type style // -X:name,type style
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
} }
if ( tribbleType == null ) if ( tribbleType == null ) {
// IntervalBindings allow streaming conversion of Strings
if ( createIntervalBinding ) {
return new IntervalBinding(value);
}
if ( ! file.exists() ) { if ( ! file.exists() ) {
throw new UserException.CouldNotReadInputFile(file, "file does not exist"); throw new UserException.CouldNotReadInputFile(file, "file does not exist");
} else if ( ! file.canRead() || ! file.isFile() ) { } else if ( ! file.canRead() || ! file.isFile() ) {
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s", "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
manager.userFriendlyListOfAvailableFeatures(parameterType))); manager.userFriendlyListOfAvailableFeatures(parameterType)));
} }
}
} }
} }
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class); Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags); Object result;
parsingEngine.addTags(result,tags); if ( createIntervalBinding ) {
parsingEngine.addRodBinding(result); result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
} else {
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
parsingEngine.addTags(rbind, tags);
parsingEngine.addRodBinding(rbind);
result = rbind;
}
return result; return result;
} catch (InvocationTargetException e) { } catch (InvocationTargetException e) {
throw new UserException.CommandLineException( throw new UserException.CommandLineException(
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
} }
} }
/**
* Parser for RodBinding objects
*/
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
/**
* We only want IntervalBinding class objects
* @param type The type to check.
* @return true if the provided class is an IntervalBinding.class
*/
@Override
public boolean supports( Class type ) {
return isIntervalBinding(type);
}
public static boolean isIntervalBinding( Class type ) {
return IntervalBinding.class.isAssignableFrom(type);
}
/**
* See note from RodBindingArgumentTypeDescriptor.parse().
*
* @param parsingEngine parsing engine
* @param source source
* @param type type to check
* @param matches matches
* @return the IntervalBinding object.
*/
@Override
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
}
}
/** /**
* Parse simple argument types: java primitives, wrapper classes, and anything that has * Parse simple argument types: java primitives, wrapper classes, and anything that has
* a simple String constructor. * a simple String constructor.
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor { class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
@Override @Override
public boolean supports( Class type ) { public boolean supports( Class type ) {
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false; if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
if ( type.isPrimitive() ) return true; if ( type.isPrimitive() ) return true;
if ( type.isEnum() ) return true; if ( type.isEnum() ) return true;
if ( primitiveToWrapperMap.containsValue(type) ) return true; if ( primitiveToWrapperMap.containsValue(type) ) return true;

View File

@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.HelpFormatter; import org.broadinstitute.sting.utils.help.HelpFormatter;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.*;
import java.util.Collections;
import java.util.EnumSet;
import java.util.Locale;
public abstract class CommandLineProgram { public abstract class CommandLineProgram {
@ -155,6 +152,7 @@ public abstract class CommandLineProgram {
* *
* @param clp the command line program to execute * @param clp the command line program to execute
* @param args the command line arguments passed in * @param args the command line arguments passed in
* @param dryRun dry run
* @throws Exception when an exception occurs * @throws Exception when an exception occurs
*/ */
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
@ -176,6 +174,8 @@ public abstract class CommandLineProgram {
ParsingEngine parser = clp.parser = new ParsingEngine(clp); ParsingEngine parser = clp.parser = new ParsingEngine(clp);
parser.addArgumentSource(clp.getClass()); parser.addArgumentSource(clp.getClass());
Map<ArgumentMatchSource, List<String>> parsedArgs;
// process the args // process the args
if (clp.canAddArgumentsDynamically()) { if (clp.canAddArgumentsDynamically()) {
// if the command-line program can toss in extra args, fetch them and reparse the arguments. // if the command-line program can toss in extra args, fetch them and reparse the arguments.
@ -196,14 +196,14 @@ public abstract class CommandLineProgram {
Class[] argumentSources = clp.getArgumentSources(); Class[] argumentSources = clp.getArgumentSources();
for (Class argumentSource : argumentSources) for (Class argumentSource : argumentSources)
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource); parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
parser.parse(args); parsedArgs = parser.parse(args);
if (isHelpPresent(parser)) if (isHelpPresent(parser))
printHelpAndExit(clp, parser); printHelpAndExit(clp, parser);
if ( ! dryRun ) parser.validate(); if ( ! dryRun ) parser.validate();
} else { } else {
parser.parse(args); parsedArgs = parser.parse(args);
if ( ! dryRun ) { if ( ! dryRun ) {
if (isHelpPresent(parser)) if (isHelpPresent(parser))
@ -230,7 +230,7 @@ public abstract class CommandLineProgram {
} }
// regardless of what happens next, generate the header information // regardless of what happens next, generate the header information
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args); HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs);
// call the execute // call the execute
CommandLineProgram.result = clp.execute(); CommandLineProgram.result = clp.execute();

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.readers.AsciiLineReader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalUtils;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.*;
/**
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
*
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
*
* Note that this class is immutable.
*/
public final class IntervalBinding<T extends Feature> {
private RodBinding<T> featureIntervals;
private String stringIntervals;
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
}
@Requires({"intervalArgument != null"})
public IntervalBinding(String intervalArgument) {
stringIntervals = intervalArgument;
}
public String getSource() {
if ( featureIntervals != null )
return featureIntervals.getSource();
return stringIntervals;
}
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
List<GenomeLoc> intervals;
if ( featureIntervals != null ) {
intervals = new ArrayList<GenomeLoc>();
//RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
// toolkit.getGenomeLocParser(),
// toolkit.getArguments().unsafe);
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
if ( codec instanceof ReferenceDependentFeatureCodec )
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
try {
FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
AsciiLineReader lineReader = new AsciiLineReader(fis);
codec.readHeader(lineReader);
String line = lineReader.readLine();
while ( line != null ) {
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(codec.decodeLoc(line)));
line = lineReader.readLine();
}
} catch (IOException e) {
throw new UserException("Problem reading the interval file " + featureIntervals.getSource() + "; " + e.getMessage());
}
} else {
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
}
return intervals;
}
}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.commandline; package org.broadinstitute.sting.commandline;
import com.google.java.contract.Requires; import com.google.java.contract.Requires;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.classloader.JVMUtils; import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.help.ApplicationDetails; import org.broadinstitute.sting.utils.help.ApplicationDetails;
import org.broadinstitute.sting.utils.help.HelpFormatter; import org.broadinstitute.sting.utils.help.HelpFormatter;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field; import java.lang.reflect.Field;
import java.util.*; import java.util.*;
@ -75,6 +78,7 @@ public class ParsingEngine {
* The type of set used must be ordered (but not necessarily sorted). * The type of set used must be ordered (but not necessarily sorted).
*/ */
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(), private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
new IntervalBindingArgumentTypeDescriptor(),
new RodBindingArgumentTypeDescriptor(), new RodBindingArgumentTypeDescriptor(),
new CompoundArgumentTypeDescriptor(), new CompoundArgumentTypeDescriptor(),
new MultiplexArgumentTypeDescriptor()) ); new MultiplexArgumentTypeDescriptor()) );
@ -100,6 +104,8 @@ public class ParsingEngine {
if(clp != null) if(clp != null)
argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors()); argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors());
argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS); argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS);
addArgumentSource(ParsingEngineArgumentFiles.class);
} }
/** /**
@ -148,21 +154,43 @@ public class ParsingEngine {
* command-line arguments to the arguments that are actually * command-line arguments to the arguments that are actually
* required. * required.
* @param tokens Tokens passed on the command line. * @param tokens Tokens passed on the command line.
* @return The parsed arguments by file.
*/ */
public void parse( String[] tokens ) { public SortedMap<ArgumentMatchSource, List<String>> parse( String[] tokens ) {
argumentMatches = new ArgumentMatches(); argumentMatches = new ArgumentMatches();
SortedMap<ArgumentMatchSource, List<String>> parsedArgs = new TreeMap<ArgumentMatchSource, List<String>>();
int lastArgumentMatchSite = -1; List<String> cmdLineTokens = Arrays.asList(tokens);
parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs);
for( int i = 0; i < tokens.length; i++ ) { ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles();
String token = tokens[i];
// Load the arguments ONLY into the argument files.
// Validation may optionally run on the rest of the arguments.
loadArgumentsIntoObject(argumentFiles);
for (File file: argumentFiles.files) {
List<String> fileTokens = getArguments(file);
parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs);
}
return parsedArgs;
}
private void parse(ArgumentMatchSource matchSource, List<String> tokens,
ArgumentMatches argumentMatches, SortedMap<ArgumentMatchSource, List<String>> parsedArgs) {
ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1);
int i = 0;
for (String token: tokens) {
// If the token is of argument form, parse it into its own argument match. // If the token is of argument form, parse it into its own argument match.
// Otherwise, pair it with the most recently used argument discovered. // Otherwise, pair it with the most recently used argument discovered.
ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i);
if( isArgumentForm(token) ) { if( isArgumentForm(token) ) {
ArgumentMatch argumentMatch = parseArgument( token, i ); ArgumentMatch argumentMatch = parseArgument( token, site );
if( argumentMatch != null ) { if( argumentMatch != null ) {
argumentMatches.mergeInto( argumentMatch ); argumentMatches.mergeInto( argumentMatch );
lastArgumentMatchSite = i; lastArgumentMatchSite = site;
} }
} }
else { else {
@ -170,10 +198,31 @@ public class ParsingEngine {
!argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite)) !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite))
argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token ); argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token );
else else
argumentMatches.MissingArgument.addValue( i, token ); argumentMatches.MissingArgument.addValue( site, token );
} }
i++;
} }
parsedArgs.put(matchSource, tokens);
}
private List<String> getArguments(File file) {
try {
if (file.getAbsolutePath().endsWith(".list")) {
return getListArguments(file);
}
} catch (IOException e) {
throw new UserException.CouldNotReadInputFile(file, e);
}
throw new UserException.CouldNotReadInputFile(file, "file extension is not .list");
}
private List<String> getListArguments(File file) throws IOException {
ArrayList<String> argsList = new ArrayList<String>();
for (String line: FileUtils.readLines(file))
argsList.addAll(Arrays.asList(Utils.escapeExpressions(line)));
return argsList;
} }
public enum ValidationType { MissingRequiredArgument, public enum ValidationType { MissingRequiredArgument,
@ -494,7 +543,7 @@ public class ParsingEngine {
* @param position The position of the token in question. * @param position The position of the token in question.
* @return ArgumentMatch associated with this token, or null if no match exists. * @return ArgumentMatch associated with this token, or null if no match exists.
*/ */
private ArgumentMatch parseArgument( String token, int position ) { private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) {
if( !isArgumentForm(token) ) if( !isArgumentForm(token) )
throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token ); throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );
@ -579,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException {
private static String formatArguments( ArgumentMatch invalidValues ) { private static String formatArguments( ArgumentMatch invalidValues ) {
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
for( int index: invalidValues.indices.keySet() ) for( ArgumentMatchSite site: invalidValues.sites.keySet() )
for( String value: invalidValues.indices.get(index) ) { for( String value: invalidValues.sites.get(site) ) {
sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) ); switch (site.getSource().getType()) {
case CommandLine:
sb.append( String.format("%nInvalid argument value '%s' at position %d.",
value, site.getIndex()) );
break;
case File:
sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.",
value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) );
break;
default:
throw new RuntimeException( String.format("Unexpected argument match source type: %s",
site.getSource().getType()));
}
if(value != null && Utils.dupString(' ',value.length()).equals(value)) if(value != null && Utils.dupString(' ',value.length()).equals(value))
sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace."); sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace.");
} }
@ -634,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException {
private static String formatArguments(ArgumentDefinition definition, String argumentPassed) { private static String formatArguments(ArgumentDefinition definition, String argumentPassed) {
return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions)); return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions));
} }
} }
/**
* Container class to store the list of argument files.
* The files will be parsed after the command line arguments.
*/
class ParsingEngineArgumentFiles {
@Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false)
public List<File> files = new ArrayList<File>();
}

View File

@ -68,7 +68,7 @@ public abstract class ParsingMethod {
* @return An argument match. Definition field will be populated if a match was found or * @return An argument match. Definition field will be populated if a match was found or
* empty if no appropriate definition could be found. * empty if no appropriate definition could be found.
*/ */
public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) { public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) {
// If the argument is valid, parse out the argument. // If the argument is valid, parse out the argument.
Matcher matcher = pattern.matcher(token); Matcher matcher = pattern.matcher(token);
@ -102,9 +102,7 @@ public abstract class ParsingMethod {
// Try to find a matching argument. If found, label that as the match. If not found, add the argument // Try to find a matching argument. If found, label that as the match. If not found, add the argument
// with a null definition. // with a null definition.
ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags); return new ArgumentMatch(argument,argumentDefinition,position,tags);
return argumentMatch;
} }
/** /**

View File

@ -28,34 +28,30 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.samtools.*; import net.sf.samtools.*;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.*; import org.broadinstitute.sting.commandline.*;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection; import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion; import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.datasources.reads.*; import org.broadinstitute.sting.gatk.datasources.reads.*;
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource; import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource; import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.gatk.datasources.sample.Sample; import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.executive.MicroScheduler; import org.broadinstitute.sting.gatk.executive.MicroScheduler;
import org.broadinstitute.sting.gatk.filters.FilterManager; import org.broadinstitute.sting.gatk.filters.FilterManager;
import org.broadinstitute.sting.gatk.filters.ReadFilter; import org.broadinstitute.sting.gatk.filters.ReadFilter;
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter; import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.io.stubs.Stub; import org.broadinstitute.sting.gatk.io.stubs.Stub;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder; import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
import org.broadinstitute.sting.gatk.walkers.*; import org.broadinstitute.sting.gatk.walkers.*;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.broadinstitute.sting.utils.interval.IntervalUtils; import org.broadinstitute.sting.utils.interval.IntervalUtils;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.io.File; import java.io.File;
import java.util.*; import java.util.*;
@ -92,7 +88,7 @@ public class GenomeAnalysisEngine {
/** /**
* Accessor for sample metadata * Accessor for sample metadata
*/ */
private SampleDataSource sampleDataSource = null; private SampleDB sampleDB = null;
/** /**
* Accessor for sharded reference-ordered data. * Accessor for sharded reference-ordered data.
@ -206,6 +202,9 @@ public class GenomeAnalysisEngine {
// Prepare the data for traversal. // Prepare the data for traversal.
initializeDataSources(); initializeDataSources();
// initialize sampleDB
initializeSampleDB();
// initialize and validate the interval list // initialize and validate the interval list
initializeIntervals(); initializeIntervals();
validateSuppliedIntervals(); validateSuppliedIntervals();
@ -222,12 +221,12 @@ public class GenomeAnalysisEngine {
ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals); ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
// execute the microscheduler, storing the results // execute the microscheduler, storing the results
Object result = microScheduler.execute(this.walker, shardStrategy); return microScheduler.execute(this.walker, shardStrategy);
//monitor.stop(); //monitor.stop();
//logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed())); //logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));
return result; //return result;
} }
/** /**
@ -259,13 +258,12 @@ public class GenomeAnalysisEngine {
* @return A collection of available filters. * @return A collection of available filters.
*/ */
public Collection<ReadFilter> createFilters() { public Collection<ReadFilter> createFilters() {
Set<ReadFilter> filters = new HashSet<ReadFilter>(); final List<ReadFilter> filters = WalkerManager.getReadFilters(walker,this.getFilterManager());
filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0) if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList)); filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
for(String filterName: this.getArguments().readFilters) for(final String filterName: this.getArguments().readFilters)
filters.add(this.getFilterManager().createByName(filterName)); filters.add(this.getFilterManager().createByName(filterName));
return Collections.unmodifiableSet(filters); return Collections.unmodifiableList(filters);
} }
/** /**
@ -299,10 +297,14 @@ public class GenomeAnalysisEngine {
else if(WalkerManager.getDownsamplingMethod(walker) != null) else if(WalkerManager.getDownsamplingMethod(walker) != null)
method = WalkerManager.getDownsamplingMethod(walker); method = WalkerManager.getDownsamplingMethod(walker);
else else
method = argCollection.getDefaultDownsamplingMethod(); method = GATKArgumentCollection.getDefaultDownsamplingMethod();
return method; return method;
} }
protected void setDownsamplingMethod(DownsamplingMethod method) {
argCollection.setDownsamplingMethod(method);
}
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); } public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); } public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
@ -381,18 +383,18 @@ public class GenomeAnalysisEngine {
// If intervals is non-null and empty at this point, it means that the list of intervals to process // If intervals is non-null and empty at this point, it means that the list of intervals to process
// was filtered down to an empty set (eg., the user specified something like -L chr1 -XL chr1). Since // was filtered down to an empty set (eg., the user specified something like -L chr1 -XL chr1). Since
// this was very likely unintentional, the user should be informed of this. Note that this is different // this was very likely unintentional, the user should be informed of this. Note that this is different
// from the case where intervals == null, which indicates either that there were no interval arguments, // from the case where intervals == null, which indicates that there were no interval arguments.
// or that -L all was specified.
if ( intervals != null && intervals.isEmpty() ) { if ( intervals != null && intervals.isEmpty() ) {
throw new ArgumentException("The given combination of -L and -XL options results in an empty set. " + logger.warn("The given combination of -L and -XL options results in an empty set. No intervals to process.");
"No intervals to process.");
} }
} }
/** /**
* Get the sharding strategy given a driving data source. * Get the sharding strategy given a driving data source.
* *
* @param readsDataSource readsDataSource
* @param drivingDataSource Data on which to shard. * @param drivingDataSource Data on which to shard.
* @param intervals intervals
* @return the sharding strategy * @return the sharding strategy
*/ */
protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) { protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
@ -429,7 +431,7 @@ public class GenomeAnalysisEngine {
return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region); return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
} }
ShardStrategy shardStrategy = null; ShardStrategy shardStrategy;
ShardStrategyFactory.SHATTER_STRATEGY shardType; ShardStrategyFactory.SHATTER_STRATEGY shardType;
long SHARD_SIZE = 100000L; long SHARD_SIZE = 100000L;
@ -438,6 +440,8 @@ public class GenomeAnalysisEngine {
if (walker instanceof RodWalker) SHARD_SIZE *= 1000; if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
if (intervals != null && !intervals.isEmpty()) { if (intervals != null && !intervals.isEmpty()) {
if (readsDataSource == null)
throw new IllegalArgumentException("readsDataSource is null");
if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate) if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately."); throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
@ -501,7 +505,8 @@ public class GenomeAnalysisEngine {
*/ */
private void initializeTempDirectory() { private void initializeTempDirectory() {
File tempDir = new File(System.getProperty("java.io.tmpdir")); File tempDir = new File(System.getProperty("java.io.tmpdir"));
tempDir.mkdirs(); if (!tempDir.exists() && !tempDir.mkdirs())
throw new UserException.BadTmpDir("Unable to create directory");
} }
/** /**
@ -566,34 +571,23 @@ public class GenomeAnalysisEngine {
protected void initializeIntervals() { protected void initializeIntervals() {
// return if no interval arguments at all // return if no interval arguments at all
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null)) if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
return; return;
// if '-L all' was specified, verify that it was the only -L specified and return if so. // Note that the use of '-L all' is no longer supported.
if(argCollection.intervals != null) {
for(String interval: argCollection.intervals) {
if(interval.trim().equals("all")) {
if(argCollection.intervals.size() > 1)
throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
// '-L all' was specified and seems valid. Return.
return;
}
}
}
// if include argument isn't given, create new set of all possible intervals // if include argument isn't given, create new set of all possible intervals
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ? GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) : GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging))); loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
// if no exclude arguments, can return parseIntervalArguments directly // if no exclude arguments, can return parseIntervalArguments directly
if (argCollection.excludeIntervals == null) if ( argCollection.excludeIntervals == null )
intervals = includeSortedSet; intervals = includeSortedSet;
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
else { else {
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null); GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
intervals = includeSortedSet.subtractRegions(excludeSortedSet); intervals = includeSortedSet.subtractRegions(excludeSortedSet);
// logging messages only printed when exclude (-XL) arguments are given // logging messages only printed when exclude (-XL) arguments are given
@ -608,47 +602,26 @@ public class GenomeAnalysisEngine {
/** /**
* Loads the intervals relevant to the current execution * Loads the intervals relevant to the current execution
* @param argList String representation of arguments; might include 'all', filenames, intervals in samtools * @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
* notation, or a combination of the above * @param rule interval merging rule
* @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null.
* @return A sorted, merged list of all intervals specified in this arg list. * @return A sorted, merged list of all intervals specified in this arg list.
*/ */
protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) { protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST || List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
argCollection.unsafe == ValidationExclusion.TYPE.ALL); for ( IntervalBinding intervalBinding : argList ) {
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList); if ( intervals.isEmpty() ) {
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule); logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
}
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
}
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging); return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
} }
/**
* if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
* @return ROD intervals as GenomeLocs
*/
private List<GenomeLoc> getRODIntervals() {
Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
// Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
if (rodNames != null && argCollection.RODToInterval != null) {
String rodName = argCollection.RODToInterval;
// check to make sure we have a rod of that name
if (!rodNames.containsKey(rodName))
throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
for (String str : rodNames.keySet())
if (str.equals(rodName)) {
logger.info("Adding interval list from track (ROD) named " + rodName);
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
ret.addAll(intervalGenerator.toGenomeLocList());
}
}
return ret;
}
/** /**
* Add additional, externally managed IO streams for inputs. * Add additional, externally managed IO streams for inputs.
* *
@ -692,12 +665,22 @@ public class GenomeAnalysisEngine {
for (ReadFilter filter : filters) for (ReadFilter filter : filters)
filter.initialize(this); filter.initialize(this);
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe); rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
} }
/**
* Entry-point function to initialize the samples database from input data and pedigree arguments
*/
private void initializeSampleDB() {
SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
sampleDB = sampleDBBuilder.getFinalSampleDB();
}
/** /**
* Gets a unique identifier for the reader sourcing this read. * Gets a unique identifier for the reader sourcing this read.
* @param read Read to examine. * @param read Read to examine.
@ -716,106 +699,13 @@ public class GenomeAnalysisEngine {
return getReadsDataSource().getSAMFile(id); return getReadsDataSource().getSAMFile(id);
} }
/**
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
* found in the corresponding bam file.
*
* @return Sets of samples in the merged input SAM stream, grouped by readers
*/
public List<Set<String>> getSamplesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> samples = new HashSet<String>(1);
sample_sets.add(samples);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
samples.add(g.getSample());
}
}
return sample_sets;
}
/**
* Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
* found in the corresponding bam file.
*
* @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
*/
public List<Set<String>> getLibrariesByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> libs = new HashSet<String>(2);
lib_sets.add(libs);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
libs.add(g.getLibrary());
}
}
return lib_sets;
}
/**
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
*
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
*
* @return sets of (merged) read group ids in order of input bams
*/
public List<Set<String>> getMergedReadGroupsByReaders() {
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
for (SAMReaderID r : readers) {
Set<String> groups = new HashSet<String>(5);
rg_sets.add(groups);
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
// use HeaderMerger to translate original read group id from the reader into the read group id in the
// merged stream, and save that remapped read group id to associate it with specific reader
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
} else {
// otherwise, pass through the unmapped read groups since this is what Picard does as well
groups.add(g.getReadGroupId());
}
}
}
return rg_sets;
}
/** /**
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available). * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
* *
* @param reads Reads data source. * @param reads Reads data source.
* @param reference Reference data source. * @param reference Reference data source.
* @param rods a collection of the reference ordered data tracks * @param rods a collection of the reference ordered data tracks
* @param manager manager
*/ */
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) { private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null ) if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
@ -844,15 +734,22 @@ public class GenomeAnalysisEngine {
/** /**
* Gets a data source for the given set of reads. * Gets a data source for the given set of reads.
* *
* @param argCollection arguments
* @param genomeLocParser parser
* @param refReader reader
* @return A data source for the given set of reads. * @return A data source for the given set of reads.
*/ */
private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) { private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
DownsamplingMethod method = getDownsamplingMethod(); DownsamplingMethod method = getDownsamplingMethod();
// Synchronize the method back into the collection so that it shows up when
// interrogating for the downsample method during command line recreation.
setDownsamplingMethod(method);
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF) if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested."); throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
SAMDataSource dataSource = new SAMDataSource( return new SAMDataSource(
samReaderIDs, samReaderIDs,
genomeLocParser, genomeLocParser,
argCollection.useOriginalBaseQualities, argCollection.useOriginalBaseQualities,
@ -868,14 +765,12 @@ public class GenomeAnalysisEngine {
refReader, refReader,
argCollection.defaultBaseQualities, argCollection.defaultBaseQualities,
!argCollection.disableLowMemorySharding); !argCollection.disableLowMemorySharding);
return dataSource;
} }
/** /**
* Opens a reference sequence file paired with an index. Only public for testing purposes * Opens a reference sequence file paired with an index. Only public for testing purposes
* *
* @param refFile Handle to a reference sequence file. Non-null. * @param refFile Handle to a reference sequence file. Non-null.
* @return A thread-safe file wrapper.
*/ */
public void setReferenceDataSource(File refFile) { public void setReferenceDataSource(File refFile) {
this.referenceDataSource = new ReferenceDataSource(refFile); this.referenceDataSource = new ReferenceDataSource(refFile);
@ -929,6 +824,26 @@ public class GenomeAnalysisEngine {
return readsDataSource.getHeader(reader); return readsDataSource.getHeader(reader);
} }
/**
* Returns an ordered list of the unmerged SAM file headers known to this engine.
* @return list of header for each input SAM file, in command line order
*/
public List<SAMFileHeader> getSAMFileHeaders() {
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
headers.add(getReadsDataSource().getHeader(id));
}
return headers;
}
/**
* Gets the master sequence dictionary for this GATK engine instance
* @return a never-null dictionary listing all of the contigs known to this engine instance
*/
public SAMSequenceDictionary getMasterSequenceDictionary() {
return getReferenceDataSource().getReference().getSequenceDictionary();
}
/** /**
* Returns data source object encapsulating all essential info and handlers used to traverse * Returns data source object encapsulating all essential info and handlers used to traverse
* reads; header merger, individual file readers etc can be accessed through the returned data source object. * reads; header merger, individual file readers etc can be accessed through the returned data source object.
@ -939,8 +854,6 @@ public class GenomeAnalysisEngine {
return this.readsDataSource; return this.readsDataSource;
} }
/** /**
* Sets the collection of GATK main application arguments. * Sets the collection of GATK main application arguments.
* *
@ -1027,140 +940,14 @@ public class GenomeAnalysisEngine {
return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics(); return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
} }
public SampleDataSource getSampleMetadata() { // -------------------------------------------------------------------------------------
return this.sampleDataSource; //
} // code for working with Samples database
//
// -------------------------------------------------------------------------------------
/** public SampleDB getSampleDB() {
* Get a sample by its ID return this.sampleDB;
* If an alias is passed in, return the main sample object
* @param id sample id
* @return sample Object with this ID
*/
public Sample getSampleById(String id) {
return sampleDataSource.getSampleById(id);
}
/**
* Get the sample for a given read group
* Must first look up ID for read group
* @param readGroup of sample
* @return sample object with ID from the read group
*/
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
return sampleDataSource.getSampleByReadGroup(readGroup);
}
/**
* Get a sample for a given read
* Must first look up read group, and then sample ID for that read group
* @param read of sample
* @return sample object of this read
*/
public Sample getSampleByRead(SAMRecord read) {
return getSampleByReadGroup(read.getReadGroup());
}
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return sampleDataSource.sampleCount();
}
/**
* Return all samples with a given family ID
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
* @param familyId family ID
* @return Samples with the given family ID
*/
public Set<Sample> getFamily(String familyId) {
return sampleDataSource.getFamily(familyId);
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample parent sample
* @return children of the given sample
*/
public Set<Sample> getChildren(Sample sample) {
return sampleDataSource.getChildren(sample);
}
/**
* Gets all the samples
* @return
*/
public Collection<Sample> getSamples() {
return sampleDataSource.getSamples();
}
/**
* Takes a list of sample names and returns their corresponding sample objects
*
* @param sampleNameList List of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
return sampleDataSource.getSamples(sampleNameList);
}
/**
* Returns a set of samples that have any value (which could be null) for a given property
* @param key Property key
* @return Set of samples with the property
*/
public Set<Sample> getSamplesWithProperty(String key) {
return sampleDataSource.getSamplesWithProperty(key);
}
/**
* Returns a set of samples that have a property with a certain value
* Value must be a string for now - could add a similar method for matching any objects in the future
*
* @param key Property key
* @param value String property value
* @return Set of samples that match key and value
*/
public Set<Sample> getSamplesWithProperty(String key, String value) {
return sampleDataSource.getSamplesWithProperty(key, value);
}
/**
* Returns a set of sample objects for the sample names in a variant context
*
* @param context Any variant context
* @return a set of the sample objects
*/
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
Set<Sample> samples = new HashSet<Sample>();
for (String sampleName : context.getSampleNames()) {
samples.add(sampleDataSource.getOrCreateSample(sampleName));
}
return samples;
}
/**
* Returns all samples that were referenced in the SAM file
*/
public Set<Sample> getSAMFileSamples() {
return sampleDataSource.getSAMFileSamples();
}
/**
* Return a subcontext restricted to samples with a given property key/value
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
* @param context VariantContext to filter
* @param key property key
* @param value property value (must be string)
* @return subcontext
*/
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
return sampleDataSource.subContextFromSampleProperty(context, key, value);
} }
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) { public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {

View File

@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.TreeMap;
/** /**
* Holds a bunch of basic information about the traversal. * Holds a bunch of basic information about the traversal.
@ -102,8 +103,12 @@ public class ReadMetrics implements Cloneable {
counter.put(filter.getClass(), c + 1L); counter.put(filter.getClass(), c + 1L);
} }
public Map<Class,Long> getCountsByFilter() { public Map<String,Long> getCountsByFilter() {
return Collections.unmodifiableMap(counter); final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
}
return sortedCounts;
} }
/** /**

View File

@ -29,13 +29,11 @@ package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.*;
/** /**
* @author ebanks * @author ebanks
* @version 1.0 * @version 1.0
*/ */
@Root
public class DbsnpArgumentCollection { public class DbsnpArgumentCollection {
/** /**

View File

@ -26,34 +26,26 @@
package org.broadinstitute.sting.gatk.arguments; package org.broadinstitute.sting.gatk.arguments;
import net.sf.samtools.SAMFileReader; import net.sf.samtools.SAMFileReader;
import org.broad.tribble.Feature;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.Hidden; import org.broadinstitute.sting.commandline.Hidden;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.IntervalBinding;
import org.broadinstitute.sting.gatk.DownsampleType; import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport; import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.interval.IntervalMergingRule; import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
import org.broadinstitute.sting.utils.interval.IntervalSetRule; import org.broadinstitute.sting.utils.interval.IntervalSetRule;
import org.simpleframework.xml.*;
import org.simpleframework.xml.core.Persister;
import org.simpleframework.xml.stream.Format;
import org.simpleframework.xml.stream.HyphenStyle;
import java.io.File; import java.io.File;
import java.io.InputStream; import java.util.*;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/** /**
* @author aaron * @author aaron
* @version 1.0 * @version 1.0
*/ */
@Root
public class GATKArgumentCollection { public class GATKArgumentCollection {
/* our version number */ /* our version number */
@ -64,58 +56,58 @@ public class GATKArgumentCollection {
public GATKArgumentCollection() { public GATKArgumentCollection() {
} }
@ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required = false)
public Map<String, String> walkerArgs = new HashMap<String, String>(); public Map<String, String> walkerArgs = new HashMap<String, String>();
// parameters and their defaults // parameters and their defaults
@ElementList(required = false)
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false) @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
public List<String> samFiles = new ArrayList<String>(); public List<String> samFiles = new ArrayList<String>();
// parameters and their defaults
@ElementList(required = false)
@Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
public List<File> sampleFiles = new ArrayList<File>();
@Element(required = false)
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false) @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
public Integer readBufferSize = null; public Integer readBufferSize = null;
@Element(required = false)
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false) @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD; public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
@ElementList(required = false) @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
public List<String> readFilters = new ArrayList<String>(); public List<String> readFilters = new ArrayList<String>();
@ElementList(required = false) /**
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false) * Using this option one can instruct the GATK engine to traverse over only part of the genome. This argument can be specified multiple times.
public List<String> intervals = null; * One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
* Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
*/
@Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
public List<IntervalBinding<Feature>> intervals = null;
@ElementList(required = false) /**
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false) * Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome. This argument can be specified multiple times.
public List<String> excludeIntervals = null; * One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
* Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
*/
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
public List<IntervalBinding<Feature>> excludeIntervals = null;
/**
* How should the intervals specified by multiple -L or -XL arguments be combined? Using this argument one can, for example, traverse over all of the positions
* for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
*/
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
/**
* Should abutting (but not overlapping) intervals be treated as separate intervals?
*/
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
@Element(required = false)
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false) @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
public File referenceFile = null; public File referenceFile = null;
@Deprecated @Deprecated
@Hidden @Hidden
@ElementList(required = false)
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false) @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
public ArrayList<String> RODBindings = new ArrayList<String>(); public ArrayList<String> RODBindings = new ArrayList<String>();
@Element(required = false)
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
public String RODToInterval = null;
@Element(required = false)
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
@Element(required = false)
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false) @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
public boolean nonDeterministicRandomSeed = false; public boolean nonDeterministicRandomSeed = false;
@ -128,22 +120,19 @@ public class GATKArgumentCollection {
private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE; private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000; private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;
@Element(required = false) @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here", required = false)
@Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
public DownsampleType downsamplingType = null; public DownsampleType downsamplingType = null;
@Element(required = false)
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false) @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
public Double downsampleFraction = null; public Double downsampleFraction = null;
@Element(required = false)
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false) @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
public Integer downsampleCoverage = null; public Integer downsampleCoverage = null;
/** /**
* Gets the downsampling method explicitly specified by the user. If the user didn't specify * Gets the downsampling method explicitly specified by the user. If the user didn't specify
* a default downsampling mechanism, return null. * a default downsampling mechanism, return the default.
* @return The explicitly specified downsampling mechanism, or null if none exists. * @return The explicitly specified downsampling mechanism, or the default if none exists.
*/ */
public DownsamplingMethod getDownsamplingMethod() { public DownsamplingMethod getDownsamplingMethod() {
if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null) if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
@ -153,16 +142,26 @@ public class GATKArgumentCollection {
return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction); return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
} }
/**
* Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments.
* @param method The downsampling mechanism.
*/
public void setDownsamplingMethod(DownsamplingMethod method) {
if (method == null)
throw new IllegalArgumentException("method is null");
downsamplingType = method.type;
downsampleCoverage = method.toCoverage;
downsampleFraction = method.toFraction;
}
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// BAQ arguments // BAQ arguments
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false) @Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF; public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
@Element(required = false)
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false) @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
public double BAQGOP = BAQ.DEFAULT_GOP; public double BAQGOP = BAQ.DEFAULT_GOP;
@ -171,7 +170,6 @@ public class GATKArgumentCollection {
// performance log arguments // performance log arguments
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false) @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
public File performanceLog = null; public File performanceLog = null;
@ -184,67 +182,117 @@ public class GATKArgumentCollection {
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null); return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
} }
@Element(required = false)
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false) @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
public Boolean useOriginalBaseQualities = false; public Boolean useOriginalBaseQualities = false;
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false) @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
public byte defaultBaseQualities = -1; public byte defaultBaseQualities = -1;
@Element(required = false)
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false) @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT; public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
@Element(required = false)
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false) @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
public ValidationExclusion.TYPE unsafe; public ValidationExclusion.TYPE unsafe;
/** How many threads should be allocated to this analysis. */ @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis", required = false)
@Element(required = false)
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
public int numberOfThreads = 1; public int numberOfThreads = 1;
/** What rule should we use when merging intervals */ @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line", required = false)
@Element(required = false)
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
@ElementList(required = false)
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
public List<String> readGroupBlackList = null; public List<String> readGroupBlackList = null;
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
// //
// distributed GATK arguments // PED (pedigree) support
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@Element(required=false)
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
@Hidden
public File processingTrackerFile = null;
@Element(required=false) /**
@Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false) * <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
@Hidden * processed in the GATK.</p>
public boolean restartProcessingTracker = false; *
* <ul>
* <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
* <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
* </ul>
*
* <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
*
* <ul>
* <li>Family ID</li>
* <li>Individual ID</li>
* <li>Paternal ID</li>
* <li>Maternal ID</li>
* <li>Sex (1=male; 2=female; other=unknown)</li>
* <li>Phenotype</li>
* </ul>
*
* <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
* quantitative trait or an affection status column: GATK will automatically detect which type
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
*
* <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
*
* <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
* line will be ignored. Do not start any family IDs with this character therefore.</p>
*
* <p>Affection status should be coded:</p>
*
* <ul>
* <li>-9 missing</li>
* <li>0 missing</li>
* <li>1 unaffected</li>
* <li>2 affected</li>
* </ul>
*
* <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
* to phenotype values are interpreted as string phenotype values. In this case -9 uniquely
* represents the missing value.</p>
*
* <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
*
* <p>For example, here are two individuals (one row = one person):</p>
*
* <pre>
* FAM001 1 0 0 1 2
* FAM001 2 0 0 1 2
* </pre>
*
* <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
* tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
*
* <p>Note that most GATK walkers do not use pedigree information. Walkers that require pedigree
* data should clearly indicate so in their arguments and will throw errors if required pedigree
* information is missing.</p>
*/
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
public List<File> pedigreeFiles = Collections.emptyList();
@Element(required=false) /**
@Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file. For debugging, only",required=false) * Inline PED records (see -ped argument). Each -pedString STRING can contain one or more
@Hidden * valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString
public File processingTrackerStatusFile = null; * as -ped supports
*/
@Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
public List<String> pedigreeStrings = Collections.emptyList();
@Element(required=false) /**
@Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false) * How strict should we be in parsing the PED files?
@Hidden */
public int processTrackerID = -1; @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
// --------------------------------------------------------------------------------------------------------------
//
// BAM indexing and sharding arguments
//
// --------------------------------------------------------------------------------------------------------------
@Element(required = false)
@Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM. NO INTEGRATION TESTS are available. Use at your own risk.",required=false) @Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM. NO INTEGRATION TESTS are available. Use at your own risk.",required=false)
@Hidden @Hidden
public boolean allowIntervalsWithUnindexedBAM = false; public boolean allowIntervalsWithUnindexedBAM = false;
@Element(required = false) @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality",required=false)
@Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality.",required=false)
public boolean disableLowMemorySharding = false; public boolean disableLowMemorySharding = false;
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
@ -253,69 +301,6 @@ public class GATKArgumentCollection {
// //
// -------------------------------------------------------------------------------------------------------------- // --------------------------------------------------------------------------------------------------------------
/**
* marshal the data out to a object
*
* @param collection the GATKArgumentCollection to load into
* @param outputFile the file to write to
*/
public static void marshal(GATKArgumentCollection collection, String outputFile) {
Serializer serializer = new Persister(new Format(new HyphenStyle()));
File result = new File(outputFile);
try {
serializer.write(collection, result);
} catch (Exception e) {
throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
}
}
/**
* marshal the data out to a object
*
* @param collection the GATKArgumentCollection to load into
* @param outputFile the stream to write to
*/
public static void marshal(GATKArgumentCollection collection, PrintStream outputFile) {
Serializer serializer = new Persister(new Format(new HyphenStyle()));
try {
serializer.write(collection, outputFile);
} catch (Exception e) {
throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
}
}
/**
* unmashall the object from a configuration file
*
* @param filename the filename to marshal from
*/
public static GATKArgumentCollection unmarshal(String filename) {
Serializer serializer = new Persister(new Format(new HyphenStyle()));
File source = new File(filename);
try {
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
return example;
} catch (Exception e) {
throw new ReviewedStingException("Failed to marshal the data from file " + filename, e);
}
}
/**
* unmashall the object from a configuration file
*
* @param file the inputstream to marshal from
*/
public static GATKArgumentCollection unmarshal(InputStream file) {
Serializer serializer = new Persister(new Format(new HyphenStyle()));
try {
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, file);
return example;
} catch (Exception e) {
throw new ReviewedStingException("Failed to marshal the data from file " + file.toString(), e);
}
}
/** /**
* test equality between two arg collections. This function defines the statement: * test equality between two arg collections. This function defines the statement:
* "not fun to write" * "not fun to write"
@ -363,7 +348,7 @@ public class GATKArgumentCollection {
if (!other.referenceFile.equals(this.referenceFile)) { if (!other.referenceFile.equals(this.referenceFile)) {
return false; return false;
} }
if (!other.intervals.equals(this.intervals)) { if ((other.intervals == null && this.intervals != null) || !other.intervals.equals(this.intervals)) {
return false; return false;
} }
if (!other.excludeIntervals.equals(this.excludeIntervals)) { if (!other.excludeIntervals.equals(this.excludeIntervals)) {
@ -386,39 +371,21 @@ public class GATKArgumentCollection {
if (other.intervalMerging != this.intervalMerging) { if (other.intervalMerging != this.intervalMerging) {
return false; return false;
} }
if ((other.RODToInterval == null && RODToInterval != null) ||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
return false;
}
if (other.phoneHomeType != this.phoneHomeType) { if (other.phoneHomeType != this.phoneHomeType) {
return false; return false;
} }
if (BTIMergeRule != other.BTIMergeRule) if (intervalSetRule != other.intervalSetRule)
return false; return false;
if ( BAQMode != other.BAQMode) return false; if ( BAQMode != other.BAQMode ) return false;
if ( BAQGOP != other.BAQGOP ) return false; if ( BAQGOP != other.BAQGOP ) return false;
if ((other.performanceLog == null && this.performanceLog != null) || if ((other.performanceLog == null && this.performanceLog != null) ||
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog))) (other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
return false; return false;
if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
return false;
if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
(other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
return false;
if ( restartProcessingTracker != other.restartProcessingTracker )
return false;
if ( processTrackerID != other.processTrackerID )
return false;
if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM) if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
return false; return false;

View File

@ -28,13 +28,11 @@ package org.broadinstitute.sting.gatk.arguments;
import org.broadinstitute.sting.commandline.Input; import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.commandline.RodBinding; import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.variantcontext.VariantContext; import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.simpleframework.xml.Root;
/** /**
* @author ebanks * @author ebanks
* @version 1.0 * @version 1.0
*/ */
@Root
public class StandardVariantContextInputArgumentCollection { public class StandardVariantContextInputArgumentCollection {
/** /**

View File

@ -37,7 +37,6 @@ public class ValidationExclusion {
public enum TYPE { public enum TYPE {
ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard
ALLOW_EMPTY_INTERVAL_LIST, // allow the user to pass in an empty interval list
ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities

View File

@ -25,12 +25,12 @@
package org.broadinstitute.sting.gatk.contexts; package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.HasGenomeLocation; import org.broadinstitute.sting.utils.HasGenomeLocation;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup; import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List; import java.util.List;
@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
*/ */
@Deprecated @Deprecated
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); } public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }
/** /**
* Are there any reads associated with this locus? * Are there any reads associated with this locus?
@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
* @return * @return
*/ */
public boolean hasReads() { public boolean hasReads() {
return basePileup != null && basePileup.size() > 0 ; return basePileup != null && basePileup.getNumberOfElements() > 0 ;
} }
/** /**
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
* @return * @return
*/ */
public int size() { public int size() {
return basePileup.size(); return basePileup.getNumberOfElements();
} }
/** /**

View File

@ -26,7 +26,6 @@
package org.broadinstitute.sting.gatk.contexts; package org.broadinstitute.sting.gatk.contexts;
import net.sf.samtools.SAMReadGroupRecord; import net.sf.samtools.SAMReadGroupRecord;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
return splitContextBySampleName(context, null); return splitContextBySampleName(context, null);
} }
public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
m.put(new Sample(entry.getKey()), entry.getValue());
}
return m;
}
/** /**
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
* of sample object. * of sample object.
@ -97,11 +88,11 @@ public class AlignmentContextUtils {
GenomeLoc loc = context.getLocation(); GenomeLoc loc = context.getLocation();
HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>(); HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();
for(String sample: context.getPileup().getSampleNames()) { for(String sample: context.getPileup().getSamples()) {
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample); ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
// Don't add empty pileups to the split context. // Don't add empty pileups to the split context.
if(pileupBySample.size() == 0) if(pileupBySample.getNumberOfElements() == 0)
continue; continue;
if(sample != null) if(sample != null)

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.datasources.providers; package org.broadinstitute.sting.gatk.datasources.providers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator; import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.Collections; import java.util.Collections;
import java.util.List; import java.util.List;
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
* @param site Site at which to create the blank locus context. * @param site Site at which to create the blank locus context.
* @return empty context. * @return empty context.
*/ */
private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList(); private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList(); private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
private AlignmentContext createEmptyLocus( GenomeLoc site ) { private AlignmentContext createEmptyLocus( GenomeLoc site ) {
return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS)); return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));

View File

@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
*/ */
public FilePointer next() { public FilePointer next() {
FilePointer current = wrappedIterator.next(); FilePointer current = wrappedIterator.next();
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0) while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
current = current.combine(parser,wrappedIterator.next()); current = current.combine(parser,wrappedIterator.next());
return current; return current;
} }

View File

@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.baq.BAQSamIterator; import org.broadinstitute.sting.utils.baq.BAQSamIterator;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
import java.io.File; import java.io.File;
import java.lang.reflect.InvocationTargetException; import java.lang.reflect.InvocationTargetException;
@ -57,6 +58,8 @@ import java.util.*;
* Converts shards to SAM iterators over the specified region * Converts shards to SAM iterators over the specified region
*/ */
public class SAMDataSource { public class SAMDataSource {
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
/** Backing support for reads. */ /** Backing support for reads. */
protected final ReadProperties readProperties; protected final ReadProperties readProperties;
@ -235,6 +238,12 @@ public class SAMDataSource {
for(SAMFileReader reader: readers.values()) { for(SAMFileReader reader: readers.values()) {
// Get the sort order, forcing it to coordinate if unsorted. // Get the sort order, forcing it to coordinate if unsorted.
SAMFileHeader header = reader.getFileHeader(); SAMFileHeader header = reader.getFileHeader();
if ( header.getReadGroups().isEmpty() ) {
throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile,
"SAM file doesn't have any read groups defined in the header. The GATK no longer supports SAM files without read groups");
}
SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate; SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate;
// Validate that all input files are sorted in the same order. // Validate that all input files are sorted in the same order.
@ -638,7 +647,9 @@ public class SAMDataSource {
BAQ.QualityMode qmode, BAQ.QualityMode qmode,
IndexedFastaSequenceFile refReader, IndexedFastaSequenceFile refReader,
byte defaultBaseQualities) { byte defaultBaseQualities) {
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities); if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
// only wrap if we are replacing the original qualitiies or using a default base quality
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
// NOTE: this (and other filtering) should be done before on-the-fly sorting // NOTE: this (and other filtering) should be done before on-the-fly sorting
// as there is no reason to sort something that we will end of throwing away // as there is no reason to sort something that we will end of throwing away
@ -750,6 +761,7 @@ public class SAMDataSource {
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) { public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
for(SAMReaderID readerID: readerIDs) { for(SAMReaderID readerID: readerIDs) {
SAMFileReader reader = new SAMFileReader(readerID.samFile); SAMFileReader reader = new SAMFileReader(readerID.samFile);
reader.setSAMRecordFactory(factory);
reader.enableFileSource(true); reader.enableFileSource(true);
reader.enableIndexMemoryMapping(false); reader.enableIndexMemoryMapping(false);
if(!enableLowMemorySharding) if(!enableLowMemorySharding)

View File

@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
// intervals // intervals
GenomeLocSortedSet intervalSortedSet = null; GenomeLocSortedSet intervalSortedSet = null;
if(intervals != null) if(intervals != null)
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL); intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
else { else {
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser); intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences()) for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())

View File

@ -1,30 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 12, 2010
* Time: 2:09:16 PM
*/
public class PropertyDefinition {
String property;
String[] values;
public String getProperty() {
return property;
}
public void setProperty(String property) {
this.property = property;
}
public String[] getValues() {
return values;
}
public void setValues(String[] values) {
this.values = values;
}
}

View File

@ -1,203 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import org.broadinstitute.sting.utils.exceptions.StingException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Jul 26, 2010
* Time: 3:31:38 PM
*/
public class Sample implements java.io.Serializable {
private final String id;
private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
private HashMap<String, Object> properties = new HashMap<String, Object>();
private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}
public Sample(String id) {
/* if (id == null) {
throw new StingException("Error creating sample: sample ID cannot be null");
}*/
this.id = id;
}
public String getId() {
return this.id;
}
public Map<String, Object> getProperties() {
return properties;
}
public void setProperties(Map<String, Object> properties) {
this.properties = (HashMap) properties;
}
public Map<String,Sample> getRelationships() {
return Collections.unmodifiableMap(this.relationships);
}
public void setSampleFileEntry(boolean value) {
this.hasSampleFileEntry = value;
}
public boolean hasSAMFileEntry() {
return this.hasSAMFileEntry;
}
public void setSAMFileEntry(boolean value) {
this.hasSAMFileEntry = value;
}
public boolean hasSampleFileEntry() {
return this.hasSampleFileEntry;
}
/**
* Get one property
* @param key key of property
* @return value of property as generic object
*/
public Object getProperty(String key) {
return properties.get(key);
}
/**
* Set a property
* If property already exists, it is overwritten
* @param key key of property
* @param value object to be stored in properties array
*/
public void setProperty(String key, Object value) {
if (relationships.containsKey(key)) {
throw new StingException("The same key cannot exist as a property and a relationship");
}
if (key.equals("gender") && value.getClass() != Gender.class) {
throw new StingException("'gender' property must be of type Sample.Gender");
}
if (key.equals("population") && value.getClass() != String.class) {
throw new StingException("'population' property must be of type String");
}
properties.put(key, value);
}
/**
* Get one relationship
* @param key of relationship
* @return Sample object that this relationship points to
*/
public Sample getRelationship(String key) {
return relationships.get(key);
}
/**
* Set one relationship
* If already set, it is overwritten
* @param key key of the relationship
* @param value Sample object this relationship points to
*/
public void setRelationship(String key, Sample value) {
if (properties.containsKey(key)) {
throw new StingException("The same key cannot exist as a property and a relationship");
}
relationships.put(key, value);
}
/**
* Get the sample's mother
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother() {
return getRelationship("mother");
}
/**
* Get the sample's father
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather() {
return getRelationship("father");
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender
*/
public Gender getGender() {
return (Gender) properties.get("gender");
}
public String getPopulation() {
return (String) properties.get("population");
}
public String getFamilyId() {
return (String) properties.get("familyId");
}
/**
* @return True if sample is male, false if female, unknown, or null
*/
public boolean isMale() {
return properties.get("gender") == Gender.MALE;
}
/**
* @return True if sample is female, false if male, unknown or null
*/
public boolean isFemale() {
return properties.get("gender") == Gender.MALE;
}
/**
*
* @param key property key
* @return true if sample has this property (even if its value is null)
*/
public boolean hasProperty(String key) {
return properties.containsKey(key);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Sample sample = (Sample) o;
if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
return false;
return true;
}
@Override
public int hashCode() {
return id != null ? id.hashCode() : "".hashCode();
}
}

View File

@ -1,31 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 13, 2010
* Time: 5:13:46 PM
*/
public class SampleAlias {
String mainId;
String[] otherIds;
public String getMainId() {
return mainId;
}
public void setMainId(String mainId) {
this.mainId = mainId;
}
public String[] getOtherIds() {
return otherIds;
}
public void setOtherIds(String[] otherIds) {
this.otherIds = otherIds;
}
}

View File

@ -1,590 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.yaml.snakeyaml.TypeDescription;
import org.yaml.snakeyaml.Yaml;
import org.yaml.snakeyaml.constructor.Constructor;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Jul 26, 2010
* Time: 3:30:09 PM
*
* This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
* in the GATK by the "--samples" argument. This class reads and parses those files.
*
* Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
* only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
*
*/
public class SampleDataSource {
/**
* SAMFileHeader that has been created for this analysis.
*/
private SAMFileHeader header;
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
/**
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
* datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
*
* This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
*/
private HashMap<String, String> sampleAliases = new HashMap<String, String>();
/**
* While loading sample files, we must be aware of "special" properties and relationships that are always allowed
*/
public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
public static final String[] specialRelationships = new String[] {"mother", "father"};
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
* @param header SAMFileHeader that has been created for this analysis
* @param sampleFiles Sample files that were included on the command line
*/
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
this();
this.header = header;
// create empty sample object for each sample referenced in the SAM header
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true);
samples.put(sampleName, newSample);
}
}
// add files consecutively
if (sampleFiles != null) {
for (File file : sampleFiles) {
addFile(file);
}
}
}
public SampleDataSource() {
samples.put(null, new Sample(null));
}
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
public void addSamplesFromSAMHeader(SAMFileHeader header) {
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
if (!hasSample(sampleName)) {
Sample newSample = new Sample(sampleName);
newSample.setSAMFileEntry(true);
samples.put(sampleName, newSample);
}
}
}
/**
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
public void addFile(File sampleFile) {
BufferedReader reader;
try {
reader = new BufferedReader(new FileReader(sampleFile));
}
catch (IOException e) {
throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
}
// set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
Constructor con = new Constructor(SampleFileParser.class);
TypeDescription desc = new TypeDescription(SampleFileParser.class);
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
desc.putListPropertyType("sampleAliases", SampleAlias.class);
con.addTypeDescription(desc);
Yaml yaml = new Yaml(con);
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
SampleFileParser parser;
try {
parser = (SampleFileParser) yaml.load(reader);
}
catch (Exception e) {
throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
}
// check to see which validation options were built into the file
boolean restrictProperties = parser.getAllowedProperties() != null;
boolean restrictRelationships = parser.getAllowedRelationships() != null;
boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
// propertyValues stores the values that are allowed for a given property
HashMap<String, HashSet> propertyValues = null;
if (restrictPropertyValues) {
propertyValues = new HashMap<String, HashSet>();
for (PropertyDefinition def : parser.getPropertyDefinitions()) {
HashSet<String> set = new HashSet<String>();
for (String value : def.getValues()) {
set.add(value);
}
propertyValues.put(def.getProperty(), set);
}
}
// make sure the aliases are valid
validateAliases(parser);
// loop through each sample in the file - a SampleParser stores an object that will become a Sample
for (SampleParser sampleParser : parser.getSamples()) {
try {
// step 1: add the sample if it doesn't already exist
Sample sample = getSampleById(sampleParser.getId());
if (sample == null) {
sample = new Sample(sampleParser.getId());
}
addSample(sample);
sample.setSampleFileEntry(true);
// step 2: add the properties
if (sampleParser.getProperties() != null) {
for (String property : sampleParser.getProperties().keySet()) {
// check that property is allowed
if (restrictProperties) {
if (!isPropertyValid(property, parser.getAllowedProperties())) {
throw new StingException(property + " is an invalid property. It is not included in the list " +
"of allowed properties.");
}
}
// next check that the value is allowed
if (restrictPropertyValues) {
if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
throw new StingException("The value of property '" + property + "' is invalid. " +
"It is not included in the list of allowed values for this property.");
}
}
// next check that there isn't already a conflicting property there
if (sample.getProperty(property) != null &&
sample.getProperty(property) != sampleParser.getProperties().get(property))
{
throw new StingException(property + " is a conflicting property!");
}
// checks are passed - now add the property!
saveProperty(sample, property, sampleParser.getProperties().get(property));
}
}
// step 3: add the relationships
if (sampleParser.getRelationships() != null) {
for (String relationship : sampleParser.getRelationships().keySet()) {
String relativeId = sampleParser.getRelationships().get(relationship);
if (relativeId == null) {
throw new StingException("The relationship cannot be null");
}
// first check that it's not invalid
if (restrictRelationships) {
if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
throw new StingException(relationship + " is an invalid relationship");
}
}
// next check that there isn't already a conflicting property there
if (sample.getRelationship(relationship) != null) {
if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
throw new StingException(relationship + " is a conflicting relationship!");
}
// if the relationship is already set - and consistent with what we're reading now - no need to continue
else {
continue;
}
}
// checks are passed - now save the relationship
saveRelationship(sample, relationship, relativeId);
}
}
} catch (Exception e) {
throw new StingException("An error occurred while loading this sample from the sample file: " +
sampleParser.getId(), e);
}
}
}
private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
// if the property values weren't specified for this property, then any value is okay
if (!valuesList.containsKey(key)) {
return true;
}
// if this property has enumerated values, it must be a string
else if (value.getClass() != String.class)
return false;
// is the value specified or not?
else if (!valuesList.get(key).contains(value))
return false;
return true;
}
/**
* Makes sure that the aliases are valid
* Checks that 1) no string is used as both a main ID and an alias;
* 2) no alias is used more than once
* @param parser
*/
private void validateAliases(SampleFileParser parser) {
// no aliases sure validate
if (parser.getSampleAliases() == null)
return;
HashSet<String> mainIds = new HashSet<String>();
HashSet<String> otherIds = new HashSet<String>();
for (SampleAlias sampleAlias : parser.getSampleAliases()) {
mainIds.add(sampleAlias.getMainId());
for (String otherId : sampleAlias.getOtherIds()) {
if (mainIds.contains(otherId))
throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
"be both a main ID and an other ID", otherId));
if (!otherIds.add(otherId))
throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
"alias more than once.", otherId));
}
}
}
private boolean isPropertyValid(String property, String[] allowedProperties) {
// is it a special property that is always allowed?
for (String allowedProperty : specialProperties) {
if (property.equals(allowedProperty))
return true;
}
// is it in the allowed properties list?
for (String allowedProperty : allowedProperties) {
if (property.equals(allowedProperty))
return true;
}
return false;
}
private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
// is it a special relationship that is always allowed?
for (String allowedRelationship : specialRelationships) {
if (relationship.equals(allowedRelationship))
return true;
}
// is it in the allowed properties list?
for (String allowedRelationship : allowedRelationships) {
if (relationship.equals(allowedRelationship))
return true;
}
return false;
}
/**
* Saves a property as the correct type
* @param key property key
* @param value property value, as read from YAML parser
* @return property value to be stored
*/
private void saveProperty(Sample sample, String key, Object value) {
// convert gender to the right type, if it was stored as a String
if (key.equals("gender")) {
if (((String) value).toLowerCase().equals("male")) {
value = Sample.Gender.MALE;
}
else if (((String) value).toLowerCase().equals("female")) {
value = Sample.Gender.FEMALE;
}
else if (((String) value).toLowerCase().equals("unknown")) {
value = Sample.Gender.UNKNOWN;
}
else if (value != null) {
throw new StingException("'gender' property must be male, female, or unknown.");
}
}
try {
sample.setProperty(key, value);
}
catch (Exception e) {
throw new StingException("Could not save property " + key, e);
}
}
/**
* Saves a relationship as the correct type
* @param key relationship key
* @param relativeId sample ID string of the relative
* @return relationship value to be stored
*/
private void saveRelationship(Sample sample, String key, String relativeId) {
// get the reference that we'll store as the value
Sample relative = getSampleById(relativeId);
// create sample object for the relative, if necessary
if (relative == null) {
relative = new Sample(relativeId);
addSample(relative);
}
sample.setRelationship(key, relative);
}
/**
* Filter a sample name in case it is an alias
* @param sampleId to be filtered
* @return ID of sample that stores data for this alias
*/
private String aliasFilter(String sampleId) {
if (!sampleAliases.containsKey(sampleId))
return sampleId;
else
return sampleAliases.get(sampleId);
}
/**
* Add a sample to the collection
* @param sample to be added
*/
private void addSample(Sample sample) {
samples.put(sample.getId(), sample);
}
/**
* Check if sample with this ID exists
* Note that this will return true if name passed in is an alias
* @param id ID of sample to be checked
* @return true if sample exists; false if not
*/
public boolean hasSample(String id) {
return samples.get(aliasFilter(id)) != null;
}
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id
* @return sample Object with this ID
*/
public Sample getSampleById(String id) {
return samples.get(aliasFilter(id));
}
/**
* Get the sample for a given read group
* Must first look up ID for read group
* @param readGroup of sample
* @return sample object with ID from the read group
*/
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
String nameFromReadGroup = readGroup.getSample();
return getSampleById(nameFromReadGroup);
}
/**
* Get a sample for a given read
* Must first look up read group, and then sample ID for that read group
* @param read of sample
* @return sample object of this read
*/
public Sample getSampleByRead(SAMRecord read) {
return getSampleByReadGroup(read.getReadGroup());
}
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return samples.size();
}
/**
* Return all samples with a given family ID
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
* @param familyId
* @return
*/
public Set<Sample> getFamily(String familyId) {
HashSet<Sample> familyMembers = new HashSet<Sample>();
for (Sample sample : samples.values()) {
if (sample.getFamilyId() != null) {
if (sample.getFamilyId().equals(familyId))
familyMembers.add(sample);
}
}
return familyMembers;
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample
* @return
*/
public Set<Sample> getChildren(Sample sample) {
HashSet<Sample> children = new HashSet<Sample>();
for (Sample familyMember : getFamily(sample.getFamilyId())) {
if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
children.add(familyMember);
}
}
return children;
}
public Set<Sample> getSamples() {
HashSet<Sample> set = new HashSet<Sample>();
set.addAll(samples.values());
return set;
}
/**
* Takes a collection of sample names and returns their corresponding sample objects
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
* @param sampleNameList Set of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
HashSet<Sample> samples = new HashSet<Sample>();
for (String name : sampleNameList) {
try {
samples.add(getSampleById(name));
}
catch (Exception e) {
throw new StingException("Could not get sample with the following ID: " + name, e);
}
}
return samples;
}
/**
* Returns a set of samples that have any value (which could be null) for a given property
* @param key Property key
* @return Set of samples with the property
*/
public Set<Sample> getSamplesWithProperty(String key) {
HashSet<Sample> toReturn = new HashSet<Sample>();
for (Sample s : samples.values()) {
if (s.hasProperty(key))
toReturn.add(s);
}
return toReturn;
}
/**
* Returns a set of samples that have a property with a certain value
* Value must be a string for now - could add a similar method for matching any objects in the future
*
* @param key Property key
* @param value String property value
* @return Set of samples that match key and value
*/
public Set<Sample> getSamplesWithProperty(String key, String value) {
Set<Sample> toReturn = getSamplesWithProperty(key);
for (Sample s : toReturn) {
if (!s.getProperty(key).equals(value))
toReturn.remove(s);
}
return toReturn;
}
public Sample getOrCreateSample(String id) {
Sample sample = getSampleById(id);
if (sample == null) {
sample = new Sample(id);
addSample(sample);
}
return sample;
}
/**
* Returns all samples that were referenced in the SAM file
*/
public Set<Sample> getSAMFileSamples() {
Set<Sample> toReturn = new HashSet<Sample>();
for (Sample sample : samples.values()) {
if (sample.hasSAMFileEntry())
toReturn.add(sample);
}
return toReturn;
}
/**
* Returns a set of sample objects for the sample names in a variant context
*
* @param context Any variant context
* @return a set of the sample objects
*/
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
Set<Sample> samples = new HashSet<Sample>();
for (String sampleName : context.getSampleNames()) {
samples.add(getOrCreateSample(sampleName));
}
return samples;
}
/**
* Return a subcontext restricted to samples with a given property key/value
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
* @param context VariantContext to filter
* @param key property key
* @param value property value (must be string)
* @return subcontext
*/
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
Set<String> samplesWithProperty = new HashSet<String>();
for (String sampleName : context.getSampleNames()) {
Sample s = samples.get(sampleName);
if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
samplesWithProperty.add(sampleName);
}
Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
return context.subContextFromGenotypes(genotypes.values());
}
public static SampleDataSource createEmptyDataSource() {
SAMFileHeader header = new SAMFileHeader();
return new SampleDataSource(header, null);
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 12, 2010
* Time: 1:30:44 PM
*/
public class SampleFileParser {
private SampleAlias[] sampleAliases;
private String[] allowedProperties;
private String[] allowedRelationships;
private PropertyDefinition[] propertyDefinitions;
private SampleParser[] samples;
public PropertyDefinition[] getPropertyDefinitions() {
return propertyDefinitions;
}
public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
this.propertyDefinitions = propertyDefinitions;
}
public SampleFileParser() {
}
public String[] getAllowedProperties() {
return allowedProperties;
}
public void setAllowedProperties(String[] allowedProperties) {
this.allowedProperties = allowedProperties;
}
public SampleParser[] getSamples() {
return samples;
}
public void setSamples(SampleParser[] samples) {
this.samples = samples;
}
public String[] getAllowedRelationships() {
return allowedRelationships;
}
public void setAllowedRelationships(String[] allowedRelationships) {
this.allowedRelationships = allowedRelationships;
}
public SampleAlias[] getSampleAliases() {
return sampleAliases;
}
public void setSampleAliases(SampleAlias[] sampleAliases) {
this.sampleAliases = sampleAliases;
}
}

View File

@ -1,43 +0,0 @@
package org.broadinstitute.sting.gatk.datasources.sample;
import java.util.HashMap;
/**
* Created by IntelliJ IDEA.
* User: brett
* Date: Aug 13, 2010
* Time: 2:09:43 PM
*/
public class SampleParser {
private String id;
private HashMap<String, Object> properties;
private HashMap<String, String> relationships;
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public HashMap<String, Object> getProperties() {
return properties;
}
public void setProperties(HashMap<String, Object> properties) {
this.properties = properties;
}
public HashMap<String, String> getRelationships() {
return relationships;
}
public void setRelationships(HashMap<String, String> relationships) {
this.relationships = relationships;
}
}

View File

@ -85,12 +85,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
*/ */
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) { protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
super(engine, walker, reads, reference, rods); super(engine, walker, reads, reference, rods);
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse); this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
if (engine.getArguments().processingTrackerFile != null) {
throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode. Complain to Mark depristo@broadinstitute.org to implement and test this code path");
}
} }
public Object execute( Walker walker, ShardStrategy shardStrategy ) { public Object execute( Walker walker, ShardStrategy shardStrategy ) {

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
import org.broadinstitute.sting.gatk.io.OutputTracker; import org.broadinstitute.sting.gatk.io.OutputTracker;
import org.broadinstitute.sting.gatk.walkers.LocusWalker; import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.SampleUtils;
import java.util.Collection; import java.util.Collection;
@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
traversalEngine.startTimersIfNecessary(); traversalEngine.startTimersIfNecessary();
if(shard.getShardType() == Shard.ShardType.LOCUS) { if(shard.getShardType() == Shard.ShardType.LOCUS) {
LocusWalker lWalker = (LocusWalker)walker; LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata()); WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
for(WindowMaker.WindowMakerIterator iterator: windowMaker) { for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods); ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit()); Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());

View File

@ -62,16 +62,17 @@ public class ShardTraverser implements Callable {
Object accumulator = walker.reduceInit(); Object accumulator = walker.reduceInit();
LocusWalker lWalker = (LocusWalker)walker; LocusWalker lWalker = (LocusWalker)walker;
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here? WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
ShardDataProvider dataProvider = null; microScheduler.getReadIterator(shard),
shard.getGenomeLocs(),
microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
for(WindowMaker.WindowMakerIterator iterator: windowMaker) { for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods); final ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator ); accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
dataProvider.close(); dataProvider.close();
} }
if (dataProvider != null) dataProvider.close();
windowMaker.close(); windowMaker.close();
outputMergeTask = outputTracker.closeStorage(); outputMergeTask = outputTracker.closeStorage();

View File

@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.reads.Shard; import org.broadinstitute.sting.gatk.datasources.reads.Shard;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState; import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator; import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.util.Collection;
import java.util.Iterator; import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.NoSuchElementException; import java.util.NoSuchElementException;
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
* the given intervals. * the given intervals.
* @param iterator The data source for this window. * @param iterator The data source for this window.
* @param intervals The set of intervals over which to traverse. * @param intervals The set of intervals over which to traverse.
* @param sampleData SampleDataSource that we can reference reads with * @param sampleNames The complete set of sample names in the reads in shard
*/ */
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) { public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
this.sourceInfo = shard.getReadProperties(); this.sourceInfo = shard.getReadProperties();
this.readIterator = iterator; this.readIterator = iterator;
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null; this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
} }
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
}
public Iterator<WindowMakerIterator> iterator() { public Iterator<WindowMakerIterator> iterator() {
return this; return this;
} }

View File

@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.SAMFileHeader; import net.sf.samtools.SAMFileHeader;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
/** /**
* Filter out malformed reads. * Filter out malformed reads.
@ -37,14 +39,25 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
*/ */
public class MalformedReadFilter extends ReadFilter { public class MalformedReadFilter extends ReadFilter {
private SAMFileHeader header; private SAMFileHeader header;
@Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
boolean filterMismatchingBaseAndQuals = false;
@Override @Override
public void initialize(GenomeAnalysisEngine engine) { public void initialize(GenomeAnalysisEngine engine) {
this.header = engine.getSAMFileHeader(); this.header = engine.getSAMFileHeader();
} }
public boolean filterOut(SAMRecord read) { public boolean filterOut(SAMRecord read) {
return !checkInvalidAlignmentStart(read) || // slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided
if (!checkMismatchingBasesAndQuals(read)) {
if (!filterMismatchingBaseAndQuals)
throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +" [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals");
else
return true;
}
return !checkInvalidAlignmentStart(read) ||
!checkInvalidAlignmentEnd(read) || !checkInvalidAlignmentEnd(read) ||
!checkAlignmentDisagreesWithHeader(this.header,read) || !checkAlignmentDisagreesWithHeader(this.header,read) ||
!checkCigarDisagreesWithAlignment(read); !checkCigarDisagreesWithAlignment(read);
@ -108,4 +121,13 @@ public class MalformedReadFilter extends ReadFilter {
return false; return false;
return true; return true;
} }
/**
* Check if the read has the same number of bases and base qualities
* @param read the read to validate
* @return true if they have the same number. False otherwise.
*/
private static boolean checkMismatchingBasesAndQuals(SAMRecord read) {
return (read.getReadLength() == read.getBaseQualities().length);
}
} }

View File

@ -0,0 +1,23 @@
package org.broadinstitute.sting.gatk.filters;
import net.sf.samtools.Cigar;
import net.sf.samtools.CigarElement;
import net.sf.samtools.CigarOperator;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Argument;
/**
* Created by IntelliJ IDEA.
* User: chartl
* Date: 9/19/11
* Time: 4:09 PM
* To change this template use File | Settings | File Templates.
*/
public class ReadNameFilter extends ReadFilter {
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
private String readName;
public boolean filterOut(final SAMRecord rec) {
return ! rec.getReadName().equals(readName);
}
}

View File

@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
else if ( stub.getOutputStream() != null ) { else if ( stub.getOutputStream() != null ) {
this.file = null; this.file = null;
this.stream = stub.getOutputStream(); this.stream = stub.getOutputStream();
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes()); writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
} }
else else
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream."); throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
} }
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it. // The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes()); return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
} }

View File

@ -25,6 +25,7 @@
package org.broadinstitute.sting.gatk.io.stubs; package org.broadinstitute.sting.gatk.io.stubs;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord; import net.sf.samtools.SAMSequenceRecord;
import org.broadinstitute.sting.gatk.CommandLineExecutable; import org.broadinstitute.sting.gatk.CommandLineExecutable;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
return isCompressed; return isCompressed;
} }
/**
* Gets the master sequence dictionary from the engine associated with this stub
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
*/
public SAMSequenceDictionary getMasterSequenceDictionary() {
return engine.getMasterSequenceDictionary();
}
/** /**
* Should we tell the VCF writer not to write genotypes? * Should we tell the VCF writer not to write genotypes?
* @return true if the writer should not write genotypes. * @return true if the writer should not write genotypes.

View File

@ -35,26 +35,23 @@ import org.broadinstitute.sting.gatk.DownsampleType;
import org.broadinstitute.sting.gatk.DownsamplingMethod; import org.broadinstitute.sting.gatk.DownsamplingMethod;
import org.broadinstitute.sting.gatk.ReadProperties; import org.broadinstitute.sting.gatk.ReadProperties;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.ReservoirDownsampler; import org.broadinstitute.sting.utils.ReservoirDownsampler;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement; import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
import org.broadinstitute.sting.utils.pileup.PileupElement; import org.broadinstitute.sting.utils.pileup.PileupElement;
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.*; import java.util.*;
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */ /** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
public class LocusIteratorByState extends LocusIterator { public class LocusIteratorByState extends LocusIterator {
// private static long discarded_bases = 0L;
// private static long observed_bases = 0L;
/** our log, which we want to capture anything from this class */ /** our log, which we want to capture anything from this class */
private static Logger logger = Logger.getLogger(LocusIteratorByState.class); private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
* Used to create new GenomeLocs. * Used to create new GenomeLocs.
*/ */
private final GenomeLocParser genomeLocParser; private final GenomeLocParser genomeLocParser;
private final ArrayList<Sample> samples; private final ArrayList<String> samples;
private final ReadStateManager readStates; private final ReadStateManager readStates;
static private class SAMRecordState { static private class SAMRecordState {
@ -278,15 +275,27 @@ public class LocusIteratorByState extends LocusIterator {
// //
// ----------------------------------------------------------------------------------------------------------------- // -----------------------------------------------------------------------------------------------------------------
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) { public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
this.readInfo = readInformation; this.readInfo = readInformation;
this.genomeLocParser = genomeLocParser; this.genomeLocParser = genomeLocParser;
this.samples = new ArrayList<String>(samples);
this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
// get the list of samples // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
this.samples = new ArrayList<Sample>(sampleData.getSamples()); // there's no read data. So we need to throw this error only when samIterator.hasNext() is true
if ( this.samples.isEmpty() && samIterator.hasNext() ) {
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod()); throw new IllegalArgumentException("samples list must not be empty");
}
}
/**
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
* for the system.
*/
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
List<String> samples = new ArrayList<String>();
samples.add(null);
return samples;
} }
public Iterator<AlignmentContext> iterator() { public Iterator<AlignmentContext> iterator() {
@ -303,19 +312,6 @@ public class LocusIteratorByState extends LocusIterator {
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r); //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
} }
public void printState() {
for(Sample sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
while(iterator.hasNext()) {
SAMRecordState state = iterator.next();
logger.debug(String.format("printState():"));
SAMRecord read = state.getRead();
int offset = state.getReadOffset();
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
}
}
}
private GenomeLoc getLocation() { private GenomeLoc getLocation() {
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser); return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
} }
@ -355,14 +351,14 @@ public class LocusIteratorByState extends LocusIterator {
// In this case, the subsequent call to next() will emit the normal pileup at the current base // In this case, the subsequent call to next() will emit the normal pileup at the current base
// and shift the position. // and shift the position.
if (readInfo.generateExtendedEvents() && hasExtendedEvents) { if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>(); Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
// get current location on the reference and decrement it by 1: the indels we just stepped over // get current location on the reference and decrement it by 1: the indels we just stepped over
// are associated with the *previous* reference base // are associated with the *previous* reference base
GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1); GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
boolean hasBeenSampled = false; boolean hasBeenSampled = false;
for(Sample sample: samples) { for(final String sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample); Iterator<SAMRecordState> iterator = readStates.iterator(sample);
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample)); List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample); hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
@ -382,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength()); maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
} }
else nInsertions++; else nInsertions++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(), indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
state.getReadEventStartOffset(),
state.getEventLength(),
state.getEventBases()) );
} else { } else {
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
@ -407,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are // we count such reads (with a longer deletion spanning over a deletion at the previous base we are
// about to report) only if includeReadsWithDeletionAtLoci is true. // about to report) only if includeReadsWithDeletionAtLoci is true.
size++; size++;
indelPile.add ( new ExtendedEventPileupElement(state.getRead(), indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
state.getReadOffset()-1,
-1) // length=-1 --> noevent
); );
} }
} }
@ -426,10 +417,10 @@ public class LocusIteratorByState extends LocusIterator {
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled); nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
} else { } else {
GenomeLoc location = getLocation(); GenomeLoc location = getLocation();
Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>(); Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
boolean hasBeenSampled = false; boolean hasBeenSampled = false;
for(Sample sample: samples) { for(final String sample: samples) {
Iterator<SAMRecordState> iterator = readStates.iterator(sample); Iterator<SAMRecordState> iterator = readStates.iterator(sample);
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample)); List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample); hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
@ -447,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
continue; continue;
} else { } else {
//observed_bases++; //observed_bases++;
pile.add(new PileupElement(state.getRead(), state.getReadOffset())); pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
size++; size++;
} }
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) { } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
size++; size++;
pile.add(new PileupElement(state.getRead(), -1)); pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
nDeletions++; nDeletions++;
} }
@ -495,7 +486,7 @@ public class LocusIteratorByState extends LocusIterator {
} }
private void updateReadStates() { private void updateReadStates() {
for(Sample sample: samples) { for(final String sample: samples) {
Iterator<SAMRecordState> it = readStates.iterator(sample); Iterator<SAMRecordState> it = readStates.iterator(sample);
while ( it.hasNext() ) { while ( it.hasNext() ) {
SAMRecordState state = it.next(); SAMRecordState state = it.next();
@ -522,7 +513,7 @@ public class LocusIteratorByState extends LocusIterator {
private final PeekableIterator<SAMRecord> iterator; private final PeekableIterator<SAMRecord> iterator;
private final DownsamplingMethod downsamplingMethod; private final DownsamplingMethod downsamplingMethod;
private final SamplePartitioner samplePartitioner; private final SamplePartitioner samplePartitioner;
private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>(); private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
private final int targetCoverage; private final int targetCoverage;
private int totalReadStates = 0; private int totalReadStates = 0;
@ -540,9 +531,9 @@ public class LocusIteratorByState extends LocusIterator {
} }
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>(); Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
for(Sample sample: samples) { for(final String sample: samples) {
readStatesBySample.put(sample,new PerSampleReadStateManager()); readStatesBySample.put(sample,new PerSampleReadStateManager());
readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector()); readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
} }
samplePartitioner = new SamplePartitioner(readSelectors); samplePartitioner = new SamplePartitioner(readSelectors);
@ -554,7 +545,7 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample The sample. * @param sample The sample.
* @return Iterator over the reads associated with that sample. * @return Iterator over the reads associated with that sample.
*/ */
public Iterator<SAMRecordState> iterator(final Sample sample) { public Iterator<SAMRecordState> iterator(final String sample) {
return new Iterator<SAMRecordState>() { return new Iterator<SAMRecordState>() {
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator(); private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
@ -590,7 +581,7 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample The sample. * @param sample The sample.
* @return Total number of reads in the given sample. * @return Total number of reads in the given sample.
*/ */
public int size(final Sample sample) { public int size(final String sample) {
return readStatesBySample.get(sample).size(); return readStatesBySample.get(sample).size();
} }
@ -600,12 +591,12 @@ public class LocusIteratorByState extends LocusIterator {
* @param sample Sample, downsampled independently. * @param sample Sample, downsampled independently.
* @return Integer stop of the furthest undownsampled region. * @return Integer stop of the furthest undownsampled region.
*/ */
public int getDownsamplingExtent(final Sample sample) { public int getDownsamplingExtent(final String sample) {
return readStatesBySample.get(sample).getDownsamplingExtent(); return readStatesBySample.get(sample).getDownsamplingExtent();
} }
public SAMRecordState getFirst() { public SAMRecordState getFirst() {
for(Sample sample: samples) { for(final String sample: samples) {
PerSampleReadStateManager reads = readStatesBySample.get(sample); PerSampleReadStateManager reads = readStatesBySample.get(sample);
if(!reads.isEmpty()) if(!reads.isEmpty())
return reads.peek(); return reads.peek();
@ -639,8 +630,8 @@ public class LocusIteratorByState extends LocusIterator {
} }
samplePartitioner.complete(); samplePartitioner.complete();
for(Sample sample: samples) { for(final String sample: samples) {
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId()); ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads()); Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
@ -1072,6 +1063,3 @@ class SamplePartitioner implements ReadSelector {
} }
} }

View File

@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/** /**
* An iterator which does post-processing of a read, including potentially wrapping * An iterator which does post-processing of a read, including potentially wrapping
@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
* no next exists. * no next exists.
*/ */
public SAMRecord next() { public SAMRecord next() {
return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities); SAMRecord rec = wrappedIterator.next();
// if we are using default quals, check if we need them, and add if necessary.
// 1. we need if reads are lacking or have incomplete quality scores
// 2. we add if defaultBaseQualities has a positive value
if (defaultBaseQualities >= 0) {
byte reads [] = rec.getReadBases();
byte quals [] = rec.getBaseQualities();
if (quals == null || quals.length < reads.length) {
byte new_quals [] = new byte [reads.length];
for (int i=0; i<reads.length; i++)
new_quals[i] = defaultBaseQualities;
rec.setBaseQualities(new_quals);
}
}
// if we are using original quals, set them now if they are present in the record
if ( useOriginalBaseQualities ) {
byte[] originalQuals = rec.getOriginalBaseQualities();
if ( originalQuals != null )
rec.setBaseQualities(originalQuals);
}
return rec;
} }
/** /**

View File

@ -28,7 +28,6 @@ import org.apache.log4j.Level;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
import org.broadinstitute.sting.gatk.walkers.Walker; import org.broadinstitute.sting.gatk.walkers.Walker;
import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -101,9 +100,6 @@ public class GATKRunReport {
@Element(required = false, name = "exception") @Element(required = false, name = "exception")
private final ExceptionToXML mException; private final ExceptionToXML mException;
@Element(required = false, name = "argument_collection")
private final GATKArgumentCollection mCollection;
@Element(required = true, name = "working_directory") @Element(required = true, name = "working_directory")
private String currentPath; private String currentPath;
@ -187,7 +183,6 @@ public class GATKRunReport {
cmdLine = engine.createApproximateCommandLineArgumentString(engine, walker); cmdLine = engine.createApproximateCommandLineArgumentString(engine, walker);
} catch (Exception ignore) { } } catch (Exception ignore) { }
this.mCollection = engine.getArguments();
walkerName = engine.getWalkerName(walker.getClass()); walkerName = engine.getWalkerName(walker.getClass());
svnVersion = CommandLineGATK.getVersionNumber(); svnVersion = CommandLineGATK.getVersionNumber();
@ -293,15 +288,16 @@ public class GATKRunReport {
* That is, postReport() is guarenteed not to fail for any reason. * That is, postReport() is guarenteed not to fail for any reason.
*/ */
private File postReportToLocalDisk(File rootDir) { private File postReportToLocalDisk(File rootDir) {
String filename = getID() + ".report.xml.gz";
File file = new File(rootDir, filename);
try { try {
String filename = getID() + ".report.xml.gz";
File file = new File(rootDir, filename);
postReportToFile(file); postReportToFile(file);
logger.debug("Wrote report to " + file); logger.debug("Wrote report to " + file);
return file; return file;
} catch ( Exception e ) { } catch ( Exception e ) {
// we catch everything, and no matter what eat the error // we catch everything, and no matter what eat the error
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("Couldn't read report file", e);
file.delete();
return null; return null;
} }
} }
@ -312,6 +308,7 @@ public class GATKRunReport {
File localFile = postReportToLocalDisk(new File("./")); File localFile = postReportToLocalDisk(new File("./"));
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile); logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
if ( localFile != null ) { // we succeeded in creating the local file if ( localFile != null ) { // we succeeded in creating the local file
localFile.deleteOnExit();
try { try {
// stop us from printing the annoying, and meaningless, mime types warning // stop us from printing the annoying, and meaningless, mime types warning
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class); Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
@ -336,14 +333,13 @@ public class GATKRunReport {
//logger.info("Uploading " + localFile + " to AWS bucket"); //logger.info("Uploading " + localFile + " to AWS bucket");
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject); S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
logger.debug("Uploaded to AWS: " + s3Object); logger.debug("Uploaded to AWS: " + s3Object);
logger.info("Uploaded run statistics report to AWS S3");
} catch ( S3ServiceException e ) { } catch ( S3ServiceException e ) {
exceptDuringRunReport("S3 exception occurred", e); exceptDuringRunReport("S3 exception occurred", e);
} catch ( NoSuchAlgorithmException e ) { } catch ( NoSuchAlgorithmException e ) {
exceptDuringRunReport("Couldn't calculate MD5", e); exceptDuringRunReport("Couldn't calculate MD5", e);
} catch ( IOException e ) { } catch ( IOException e ) {
exceptDuringRunReport("Couldn't read report file", e); exceptDuringRunReport("Couldn't read report file", e);
} finally {
localFile.delete();
} }
} }
} }

View File

@ -1,130 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.indexer;
import net.sf.picard.reference.IndexedFastaSequenceFile;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.Tribble;
import org.broad.tribble.index.Index;
import org.broad.tribble.index.IndexFactory;
import org.broad.tribble.util.LittleEndianOutputStream;
import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.commandline.CommandLineProgram;
import org.broadinstitute.sting.commandline.Input;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
import java.io.File;
import java.io.FileOutputStream;
/**
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
* in which an index is located, or if you'd like to pre-index files to save time.
*/
public class RMDIndexer extends CommandLineProgram {
@Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
File inputFileSource = null;
@Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
String inputFileType = null;
@Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
public File referenceFile = null;
@Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
public File indexFile = null;
@Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
private static Logger logger = Logger.getLogger(RMDIndexer.class);
private IndexedFastaSequenceFile ref = null;
private GenomeLocParser genomeLocParser = null;
@Override
protected int execute() throws Exception {
// check parameters
// ---------------------------------------------------------------------------------
// check the input parameters
if (referenceFile != null && !referenceFile.canRead())
throw new IllegalArgumentException("We can't read the reference file: "
+ referenceFile + ", check that it exists, and that you have permissions to read it");
// set the index file to the default name if they didn't specify a file
if (indexFile == null && inputFileSource != null)
indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
// check that we can create the output file
if (indexFile == null || indexFile.exists())
throw new IllegalArgumentException("We can't write to the index file location: "
+ indexFile + ", the index exists");
logger.info(String.format("attempting to index file: %s", inputFileSource));
logger.info(String.format("using reference: %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
logger.info(String.format("using type: %s", inputFileType));
logger.info(String.format("writing to location: %s", indexFile.getAbsolutePath()));
// try to index the file
// ---------------------------------------------------------------------------------
// setup the reference
ref = new CachingIndexedFastaSequenceFile(referenceFile);
genomeLocParser = new GenomeLocParser(ref);
// get a track builder
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
// find the types available to the track builders
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
// check that the type is valid
if (descriptor == null)
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
// create the codec
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
// check if it's a reference dependent feature codec
if (codec instanceof ReferenceDependentFeatureCodec)
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
// get some timing info
long currentTime = System.currentTimeMillis();
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
// add writing of the sequence dictionary, if supplied
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
// create the output stream, and write the index
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
index.write(stream);
stream.close();
// report and exit
logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
return 0; // return successfully
}
/**
* the generic call execute main
* @param argv the arguments from the command line
*/
public static void main(String[] argv) {
try {
RMDIndexer instance = new RMDIndexer();
start(instance, argv);
System.exit(CommandLineProgram.result);
} catch (Exception e) {
exitSystemWithError(e);
}
}
}

View File

@ -0,0 +1,106 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger;
import org.broad.tribble.index.Index;
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/**
* Utilities for working with Sequence Dictionaries embedded in tribble indices
*
* @author Your Name
* @since Date created
*/
public class IndexDictionaryUtils {
private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
/**
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
* @param index the index file to use
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
Integer.valueOf(entry.getValue())));
}
return dict;
}
/**
* create the sequence dictionary with the contig list; a backup approach
* @param index the index file to use
* @param dict the sequence dictionary to add contigs to
* @return the filled-in sequence dictionary
*/
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
LinkedHashSet<String> seqNames = index.getSequenceNames();
if (seqNames == null) {
return dict;
}
for (String name : seqNames) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
for ( SAMSequenceRecord seq : dict.getSequences() ) {
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
final String length = String.valueOf(seq.getSequenceLength());
index.addProperty(contig,length);
}
}
public static void validateTrackSequenceDictionary(final String trackName,
final SAMSequenceDictionary trackDict,
final SAMSequenceDictionary referenceDict,
final ValidationExclusion.TYPE validationExclusionType ) {
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
if (trackDict == null || trackDict.size() == 0)
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
}
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.gatk.refdata.tracks; package org.broadinstitute.sting.gatk.refdata.tracks;
import net.sf.samtools.SAMSequenceDictionary; import net.sf.samtools.SAMSequenceDictionary;
import net.sf.samtools.SAMSequenceRecord;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec; import org.broad.tribble.FeatureCodec;
import org.broad.tribble.FeatureSource; import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType; import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
import org.broadinstitute.sting.utils.GenomeLocParser; import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
import java.io.File; import java.io.File;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.LinkedHashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;
/** /**
* *
* @author aaron * @author aaron
* ` * `
* Class RMDTrackBuilder * Class RMDTrackBuilder
* *
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class); private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false; public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
// a constant we use for marking sequence dictionary entries in the Tribble index property list
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
// private sequence dictionary we use to set our tracks with // private sequence dictionary we use to set our tracks with
private SAMSequenceDictionary dict = null; private SAMSequenceDictionary dict = null;
@ -150,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass); final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
if (descriptor == null) if (descriptor == null)
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName()); throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags())); return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
} }
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); } try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
catch (ReviewedStingException e) { } catch (ReviewedStingException e) { }
sequenceDictionary = getSequenceDictionaryFromProperties(index); sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
if (sequenceDictionary.size() == 0 && dict != null) { if (sequenceDictionary.size() == 0 && dict != null) {
File indexFile = Tribble.indexFile(inputFile); File indexFile = Tribble.indexFile(inputFile);
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true); validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
sequenceDictionary = getSequenceDictionaryFromProperties(index); try { // re-write the index
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
} catch (IOException e) {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
}
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
} }
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE ) if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
// this can take a while, let them know what we're doing // this can take a while, let them know what we're doing
logger.info("Creating Tribble index in memory for file " + inputFile); logger.info("Creating Tribble index in memory for file " + inputFile);
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME); Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
setIndexSequenceDictionary(inputFile, idx, dict, null, false); validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
return idx; return idx;
} }
// ---------------------------------------------------------------------------------------------------------
// static functions to work with the sequence dictionaries of indexes
// ---------------------------------------------------------------------------------------------------------
/**
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
* @param index the index file to use
* @return a SAMSequenceDictionary if available, null if unavailable
*/
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
SAMSequenceDictionary dict = new SAMSequenceDictionary();
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
Integer.valueOf(entry.getValue())));
}
return dict;
}
/**
* create the sequence dictionary with the contig list; a backup approach
* @param index the index file to use
* @param dict the sequence dictionary to add contigs to
* @return the filled-in sequence dictionary
*/
private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
LinkedHashSet<String> seqNames = index.getSequenceNames();
if (seqNames == null) {
return dict;
}
for (String name : seqNames) {
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
dict.addSequence(seq);
}
return dict;
}
/** /**
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible. * set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
* (that each contig in the index is in the sequence dictionary). * (that each contig in the index is in the sequence dictionary).
* @param inputFile for proper error message formatting. * @param inputFile for proper error message formatting.
* @param dict the sequence dictionary * @param dict the sequence dictionary
* @param index the index file * @param index the index file
* @param indexFile the index file
* @param rewriteIndex should we rewrite the index when we're done?
*
*/ */
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) { public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
if (dict == null) return; if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
for (SAMSequenceRecord seq : currentDict.getSequences()) { final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
if (dict.getSequence(seq.getSequenceName()) == null) validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
continue;
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength())); // actually update the dictionary in the index
} IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
// re-write the index
if (rewriteIndex) try {
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
} catch (IOException e) {
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
}
} }
public void validateTrackSequenceDictionary(final String trackName,
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) { final SAMSequenceDictionary trackDict,
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation final SAMSequenceDictionary referenceDict ) {
if (trackDict == null || trackDict.size() == 0) IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
else {
Set<String> trackSequences = new TreeSet<String>();
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
trackSequences.add(dictionaryEntry.getSequenceName());
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
}
} }
} }

View File

@ -1,57 +0,0 @@
package org.broadinstitute.sting.gatk.refdata.utils;
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
import org.broadinstitute.sting.utils.GenomeLoc;
import java.util.*;
/**
*
* @author aaron
*
* Class RMDIntervalGenerator
*
* Creates an interval list, given an RMDTrack
*/
public class RMDIntervalGenerator {
public ReferenceOrderedDataSource dataSource;
/**
* create a interval representation of a ROD track
* @param dataSource the track
*/
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
this.dataSource = dataSource;
}
/**
* create a genome location list from the interval track
* @return a list of genome locations
*/
public List<GenomeLoc> toGenomeLocList() {
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
while (iter.hasNext()) {
RODRecordList feature = iter.next();
GenomeLoc loc = feature.getLocation();
if (loc != null) locations.add(loc);
}
return locations;
}
/**
* return a map of reference meta data track names to RODS
* @param sources the reference ordered data sources to get the names from
* @return a map of reference meta data names to RODS
*/
public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
// get a list of the current rod names we're working with
Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
for (ReferenceOrderedDataSource rod : sources) {
rodNames.put(rod.getName(),rod);
}
return rodNames;
}
}

View File

@ -24,12 +24,14 @@
package org.broadinstitute.sting.gatk.report; package org.broadinstitute.sting.gatk.report;
import org.broadinstitute.sting.utils.collections.Pair;
import java.util.*; import java.util.*;
/** /**
* Tracks a linked list of GATKReportColumn in order by name. * Tracks a linked list of GATKReportColumn in order by name.
*/ */
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> { public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
private List<String> columnNames = new ArrayList<String>(); private List<String> columnNames = new ArrayList<String>();
/** /**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
columnNames.add(key); columnNames.add(key);
return super.put(key, value); return super.put(key, value);
} }
@Override
public Iterator<GATKReportColumn> iterator() {
return new Iterator<GATKReportColumn>() {
int offset = 0;
public boolean hasNext() { return offset < columnNames.size() ; }
public GATKReportColumn next() { return getByIndex(offset++); }
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
};
}
} }

View File

@ -286,6 +286,10 @@ public class GATKReportTable {
} }
} }
public boolean containsKey(Object primaryKey) {
return primaryKeyColumn.contains(primaryKey);
}
/** /**
* Set the value for a given position in the table * Set the value for a given position in the table
* *

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* Categorical sample trait for association and analysis
*
* Samples can have unknown status, be affected or unaffected by the
* categorical trait, or they can be marked as actually having an
* other trait value (stored in an associated value in the Sample class)
*
* @author Mark DePristo
* @since Sept. 2011
*/
public enum Affection {
/** Status is unknown */
UNKNOWN,
/** Suffers from the disease */
AFFECTED,
/** Unaffected by the disease */
UNAFFECTED,
/** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
OTHER
}

View File

@ -0,0 +1,34 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
* ENUM of possible human genders: male, female, or unknown
*/
public enum Gender {
MALE,
FEMALE,
UNKNOWN
}

View File

@ -0,0 +1,310 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.MathUtils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.*;
import java.util.*;
/**
* Reads PED file-formatted tabular text files
*
* See http://www.broadinstitute.org/mpg/tagger/faq.html
* See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
*
* The "ped" file format refers to the widely-used format for linkage pedigree data.
* Each line describes a single (diploid) individual in the following format:
*
* family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
*
* If your data lacks pedigree information (for example, unrelated case/control individuals),
* set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
* phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
* Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
* where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
* Check out the information at the PLINK website on the "ped" file format.
*
* The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
* Family ID
* Individual ID
* Paternal ID
* Maternal ID
* Sex (1=male; 2=female; other=unknown)
* Phenotype
*
* The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
* quantitative trait or an affection status column: PLINK will automatically detect which type
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
* Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
* and are actually representing these values as strings instead of doubles
*
* NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
* not a comma, i.e. 2.394 not 2,394
*
* If an individual's sex is unknown, then any character other than 1 or 2 can be used.
* When new files are created (PED, FAM, or other which contain sex) then the original coding will be
* preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
* and an error message will arise if an analysis that uses family information is requested and an
* individual of 'unknown' sex is specified as a father or mother.
*
*
* HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
* line will be ignored. Do not start any family IDs with this character therefore.
*
* Affection status, by default, should be coded:
* -9 missing
* 0 missing
* 1 unaffected
* 2 affected
*
* If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
* plink --file mydata --1 which will specify a disease phenotype coded:
*
* -9 missing
* 0 unaffected
* 1 affected
*
* The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
* disease traits as well as 0). It can be reset by including the --missing-phenotype option:
*
* Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
* (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
* character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
* alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
*
* No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
*
* FAM001 1 0 0 1 2 A A G G A C
* FAM001 2 0 0 1 2 A A A G 0 0
* ...
*
* Note that the GATK does not support genotypes in a PED file.
*
* @author Mark DePristo
* @since 2011
*/
public class PedReader {
private static Logger logger = Logger.getLogger(PedReader.class);
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
final static private String commentMarker = "#";
/**
* An enum that specifies which, if any, of the standard PED fields are
* missing from the input records. For example, suppose we have the full record:
*
* "fam1 kid dad mom 1 2"
*
* indicating a male affected child. This can be parsed with the -ped x.ped argument
* to the GATK. Suppose we only have:
*
* "fam1 kid 1"
*
* we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
*/
public enum MissingPedField {
/**
* The PED records do not have the first (FAMILY_ID) argument. The family id
* will be set to null / empty.
*/
NO_FAMILY_ID,
/**
* The PED records do not have either the paternal or maternal IDs, so
* the corresponding IDs are set to null.
*/
NO_PARENTS,
/**
* The PED records do not have the GENDER field, so the sex of each
* sample will be set to UNKNOWN.
*/
NO_SEX,
/**
* The PED records do not have the PHENOTYPE field, so the phenotype
* of each sample will be set to UNKNOWN.
*/
NO_PHENOTYPE
}
protected enum Field {
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
}
// phenotype
private final static String MISSING_VALUE1 = "-9";
private final static String MISSING_VALUE2 = "0";
private final static String PHENOTYPE_UNAFFECTED = "1";
private final static String PHENOTYPE_AFFECTED = "2";
// Sex
private final static String SEX_MALE = "1";
private final static String SEX_FEMALE = "2";
// other=unknown
public PedReader() { }
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
return parse(new FileReader(source), missingFields, sampleDB);
}
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
}
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
final List<String> lines = new XReadLines(reader).readLines();
// What are the record offsets?
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
final int samplePos = familyPos + 1;
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
// go through once and determine properties
int lineNo = 1;
boolean isQT = false;
final List<String[]> splits = new ArrayList<String[]>(lines.size());
for ( final String line : lines ) {
if ( line.startsWith(commentMarker)) continue;
if ( line.trim().equals("") ) continue;
final String[] parts = line.split("\\s+");
if ( parts.length != nExpectedFields )
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
if ( phenotypePos != -1 ) {
isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
}
splits.add(parts);
lineNo++;
}
logger.info("Phenotype is other? " + isQT);
// now go through and parse each record
lineNo = 1;
final List<Sample> samples = new ArrayList<Sample>(splits.size());
for ( final String[] parts : splits ) {
String familyID = null, individualID, paternalID = null, maternalID = null;
Gender sex = Gender.UNKNOWN;
String quantitativePhenotype = Sample.UNSET_QT;
Affection affection = Affection.UNKNOWN;
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
individualID = parts[samplePos];
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
if ( sexPos != -1 ) {
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
else sex = Gender.UNKNOWN;
}
if ( phenotypePos != -1 ) {
if ( isQT ) {
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
affection = Affection.UNKNOWN;
else {
affection = Affection.OTHER;
quantitativePhenotype = parts[phenotypePos];
}
} else {
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
}
}
final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
samples.add(s);
sampleDB.addSample(s);
lineNo++;
}
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
if ( dad != null ) samples.add(dad);
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
if ( mom != null ) samples.add(mom);
}
return samples;
}
private final static String maybeMissing(final String string) {
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
return null;
else
return string;
}
private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
if ( id != null && sampleDB.getSample(id) == null ) {
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
sampleDB.addSample(s);
return s;
} else
return null;
}
/**
* Parses a list of tags from the command line, assuming it comes from the GATK Engine
* tags, and returns the corresponding EnumSet.
*
* @param arg the actual engine arg, used for the UserException if there's an error
* @param tags a list of string tags that should be converted to the MissingPedField value
* @return
*/
public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
for ( final String tag : tags ) {
try {
missingFields.add(MissingPedField.valueOf(tag));
} catch ( IllegalArgumentException e ) {
throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
}
}
return missingFields;
}
}

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
/**
*
*/
public enum PedigreeValidationType {
/**
* Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
* entry in the pedigree file(s).
*/
STRICT,
/**
* Do not enforce any overlap between the VCF/BAM samples and the pedigree data
* */
SILENT
}

View File

@ -0,0 +1,222 @@
package org.broadinstitute.sting.gatk.samples;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.HashMap;
import java.util.Map;
/**
*
*/
public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
final private String familyID, paternalID, maternalID;
final private Gender gender;
final private String otherPhenotype;
final private Affection affection;
final private String ID;
final private SampleDB infoDB;
final private Map<String, Object> properties = new HashMap<String, Object>();
public final static String UNSET_QT = null;
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final String otherPhenotype) {
this.familyID = familyID;
this.paternalID = paternalID;
this.maternalID = maternalID;
this.gender = gender;
this.otherPhenotype = otherPhenotype;
this.affection = affection;
this.ID = ID;
this.infoDB = infoDB;
}
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection, final String otherPhenotype) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
}
protected Sample(final String ID,
final String familyID, final String paternalID, final String maternalID,
final Gender gender, final Affection affection) {
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
}
public Sample(final String ID, final SampleDB infoDB,
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
}
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
}
public Sample(String id, SampleDB infoDB) {
this(id, infoDB, null, null, null,
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
}
// -------------------------------------------------------------------------------------
//
// standard property getters
//
// -------------------------------------------------------------------------------------
public String getID() {
return ID;
}
public String getFamilyID() {
return familyID;
}
public String getPaternalID() {
return paternalID;
}
public String getMaternalID() {
return maternalID;
}
public Affection getAffection() {
return affection;
}
public boolean hasOtherPhenotype() {
return affection == Affection.OTHER;
}
public String getOtherPhenotype() {
return otherPhenotype;
}
/**
* Get the sample's mother
* @return sample object with relationship mother, if exists, or null
*/
public Sample getMother() {
return infoDB.getSample(maternalID);
}
/**
* Get the sample's father
* @return sample object with relationship father, if exists, or null
*/
public Sample getFather() {
return infoDB.getSample(paternalID);
}
/**
* Get gender of the sample
* @return property of key "gender" - must be of type Gender
*/
public Gender getGender() {
return gender;
}
@Override
public int compareTo(final Sample sample) {
return ID.compareTo(sample.getID());
}
@Override
public String toString() {
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
getOtherPhenotype(), properties);
}
// // -------------------------------------------------------------------------------------
// //
// // code for working with additional -- none standard -- properites
// //
// // -------------------------------------------------------------------------------------
//
// public Map<String, Object> getExtraProperties() {
// return Collections.unmodifiableMap(properties);
// }
//
// /**
// * Get one property
// * @param key key of property
// * @return value of property as generic object
// */
// public Object getExtraPropertyValue(final String key) {
// return properties.get(key);
// }
//
// /**
// *
// * @param key property key
// * @return true if sample has this property (even if its value is null)
// */
// public boolean hasExtraProperty(String key) {
// return properties.containsKey(key);
// }
@Override
public int hashCode() {
return ID.hashCode();
}
@Override
public boolean equals(final Object o) {
if(o == null)
return false;
if(o instanceof Sample) {
Sample otherSample = (Sample)o;
return ID.equals(otherSample.ID) &&
equalOrNull(familyID, otherSample.familyID) &&
equalOrNull(paternalID, otherSample.paternalID) &&
equalOrNull(maternalID, otherSample.maternalID) &&
equalOrNull(gender, otherSample.gender) &&
equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
equalOrNull(affection, otherSample.affection) &&
equalOrNull(properties, otherSample.properties);
}
return false;
}
private final static boolean equalOrNull(final Object o1, final Object o2) {
if ( o1 == null )
return o2 == null;
else
return o2 == null ? false : o1.equals(o2);
}
private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
if ( o1 == null || o1.equals(emptyValue) ) {
// take o2 if both are null, otherwise keep o2
return o2 == null ? null : o2;
} else {
if ( o2 == null || o2.equals(emptyValue) )
return o1; // keep o1, since it's a real value
else {
// both o1 and o2 have a value
if ( o1 == o2 )
return o1;
else
throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
}
}
}
public final static Sample mergeSamples(final Sample prev, final Sample next) {
if ( prev.equals(next) )
return next;
else {
return new Sample(prev.getID(), prev.infoDB,
mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
}
}
}

View File

@ -0,0 +1,183 @@
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMReadGroupRecord;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.utils.exceptions.StingException;
import org.broadinstitute.sting.utils.variantcontext.Genotype;
import java.util.*;
/**
*
*/
public class SampleDB {
/**
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
* this is stored as a HashMap.
*/
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
*/
public SampleDB() {
}
/**
* Protected function to add a single sample to the database
*
* @param sample to be added
*/
protected SampleDB addSample(Sample sample) {
Sample prev = samples.get(sample.getID());
if ( prev != null )
sample = Sample.mergeSamples(prev, sample);
samples.put(sample.getID(), sample);
return this;
}
// --------------------------------------------------------------------------------
//
// Functions for getting a sample from the DB
//
// --------------------------------------------------------------------------------
/**
* Get a sample by its ID
* If an alias is passed in, return the main sample object
* @param id
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(String id) {
return samples.get(id);
}
/**
*
* @param read
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final SAMRecord read) {
return getSample(read.getReadGroup());
}
/**
*
* @param rg
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final SAMReadGroupRecord rg) {
return getSample(rg.getSample());
}
/**
* @param g Genotype
* @return sample Object with this ID, or null if this does not exist
*/
public Sample getSample(final Genotype g) {
return getSample(g.getSampleName());
}
// --------------------------------------------------------------------------------
//
// Functions for accessing samples in the DB
//
// --------------------------------------------------------------------------------
/**
* Get number of sample objects
* @return size of samples map
*/
public int sampleCount() {
return samples.size();
}
public Set<Sample> getSamples() {
return new HashSet<Sample>(samples.values());
}
public Collection<String> getSampleNames() {
return Collections.unmodifiableCollection(samples.keySet());
}
/**
* Takes a collection of sample names and returns their corresponding sample objects
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
* @param sampleNameList Set of sample names
* @return Corresponding set of samples
*/
public Set<Sample> getSamples(Collection<String> sampleNameList) {
HashSet<Sample> samples = new HashSet<Sample>();
for (String name : sampleNameList) {
try {
samples.add(getSample(name));
}
catch (Exception e) {
throw new StingException("Could not get sample with the following ID: " + name, e);
}
}
return samples;
}
// --------------------------------------------------------------------------------
//
// Higher level pedigree functions
//
// --------------------------------------------------------------------------------
/**
* Returns a sorted set of the family IDs in all samples (excluding null ids)
* @return
*/
public final Set<String> getFamilyIDs() {
return getFamilies().keySet();
}
/**
* Returns a map from family ID -> set of family members for all samples with
* non-null family ids
*
* @return
*/
public final Map<String, Set<Sample>> getFamilies() {
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
for ( final Sample sample : samples.values() ) {
final String famID = sample.getFamilyID();
if ( famID != null ) {
if ( ! families.containsKey(famID) )
families.put(famID, new TreeSet<Sample>());
families.get(famID).add(sample);
}
}
return families;
}
/**
* Return all samples with a given family ID
* @param familyId
* @return
*/
public Set<Sample> getFamily(String familyId) {
return getFamilies().get(familyId);
}
/**
* Returns all children of a given sample
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
* @param sample
* @return
*/
public Set<Sample> getChildren(Sample sample) {
final HashSet<Sample> children = new HashSet<Sample>();
for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
children.add(familyMember);
}
}
return children;
}
}

View File

@ -0,0 +1,153 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.samples;
import net.sf.samtools.SAMFileHeader;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.io.File;
import java.io.FileNotFoundException;
import java.util.*;
/**
*
*/
public class SampleDBBuilder {
PedigreeValidationType validationStrictness;
final SampleDB sampleDB = new SampleDB();
final GenomeAnalysisEngine engine;
Set<Sample> samplesFromDataSources = new HashSet<Sample>();
Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
/** for testing only */
protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
engine = null;
this.validationStrictness = validationStrictness;
}
/**
* Constructor takes both a SAM header and sample files because the two must be integrated.
*/
public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
this.engine = engine;
this.validationStrictness = validationStrictness;
}
/**
* Hallucinates sample objects for all the samples in the SAM file and stores them
*/
public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
return this;
}
public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
for (final String sampleName : sampleNames) {
if (sampleDB.getSample(sampleName) == null) {
final Sample newSample = new Sample(sampleName, sampleDB);
sampleDB.addSample(newSample);
samplesFromDataSources.add(newSample); // keep track of data source samples
}
}
return this;
}
public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
for (final File pedFile : pedigreeFiles) {
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
samplesFromPedigrees.addAll(samples);
}
return this;
}
public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
for (final String pedString : pedigreeStrings) {
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
samplesFromPedigrees.addAll(samples);
}
return this;
}
/**
* Parse one sample file and integrate it with samples that are already there
* Fail quickly if we find any errors in the file
*/
private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
final PedReader reader = new PedReader();
try {
return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
} catch ( FileNotFoundException e ) {
throw new UserException.CouldNotReadInputFile(sampleFile, e);
}
}
private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
final PedReader reader = new PedReader();
return reader.parse(string, getMissingFields(string), sampleDB);
}
public SampleDB getFinalSampleDB() {
validate();
return sampleDB;
}
public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
if ( engine == null )
return EnumSet.noneOf(PedReader.MissingPedField.class);
else {
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
return PedReader.parseMissingFieldTags(engineArg, posTags);
}
}
// --------------------------------------------------------------------------------
//
// Validation
//
// --------------------------------------------------------------------------------
protected final void validate() {
if ( validationStrictness == PedigreeValidationType.SILENT )
return;
else {
// check that samples in data sources are all annotated, if anything is annotated
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
for ( final Sample pSample : samplesFromPedigrees )
sampleNamesFromPedigrees.add(pSample.getID());
for ( final Sample dsSample : samplesFromDataSources )
if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
}
}
}
}

View File

@ -364,8 +364,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
// count up the number of skipped reads by summing over all filters // count up the number of skipped reads by summing over all filters
long nSkippedReads = 0L; long nSkippedReads = 0L;
for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet()) for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
nSkippedReads += countsByFilter.getValue(); nSkippedReads += countsByFilter;
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", elapsed, elapsed / 60, elapsed / 3600)); logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", elapsed, elapsed / 60, elapsed / 3600));
if ( cumulativeMetrics.getNumReadsSeen() > 0 ) if ( cumulativeMetrics.getNumReadsSeen() > 0 )
@ -373,10 +373,10 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
nSkippedReads, nSkippedReads,
cumulativeMetrics.getNumReadsSeen(), cumulativeMetrics.getNumReadsSeen(),
100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen()))); 100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen())));
for ( Map.Entry<Class, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) { for ( Map.Entry<String, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
long count = filterCounts.getValue(); long count = filterCounts.getValue();
logger.info(String.format(" -> %d reads (%.2f%% of total) failing %s", logger.info(String.format(" -> %d reads (%.2f%% of total) failing %s",
count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey()))); count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), filterCounts.getKey()));
} }
if ( performanceLog != null ) performanceLog.close(); if ( performanceLog != null ) performanceLog.close();

View File

@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker; import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl; import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.*; import java.util.*;
@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
return "dups"; return "dups";
} }
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) { private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>(); ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();
l.add(read); l.add(read);
for (SAMRecord read2 : iter) { for (SAMRecord read2 : iter) {
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
iter.pushback(read2); iter.pushback(read2);
break; break;
} else { } else {
l.add(read2); l.add((GATKSAMRecord) read2);
} }
} }
@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* @param reads the list of reads to split into unique molecular samples * @param reads the list of reads to split into unique molecular samples
* @return * @return
*/ */
protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) { protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>(); Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();
// for each read, find duplicates, and either add the read to its duplicate list or start a new one // for each read, find duplicates, and either add the read to its duplicate list or start a new one
for ( SAMRecord read : reads ) { for ( GATKSAMRecord read : reads ) {
List<SAMRecord> readSet = findDuplicateReads(read, readSets); List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);
if ( readSet == null ) { if ( readSet == null ) {
readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read))); // copy so I can add to the list readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read))); // copy so I can add to the list
} else { } else {
readSet.add(read); readSet.add(read);
} }
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
* @param readSets * @param readSets
* @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind * @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
*/ */
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) { protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
if ( read.getReadPairedFlag() ) { if ( read.getReadPairedFlag() ) {
// paired // paired
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart()); final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
for (List<SAMRecord> reads : readSets) { for (List<GATKSAMRecord> reads : readSets) {
SAMRecord key = reads.get(0); GATKSAMRecord key = reads.get(0);
// read and key start at the same place, and either the this read and the key // read and key start at the same place, and either the this read and the key
// share a mate location or the read is flagged as a duplicate // share a mate location or the read is flagged as a duplicate
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
} }
} }
} else { } else {
for (List<SAMRecord> reads : readSets) { for (List<GATKSAMRecord> reads : readSets) {
SAMRecord key = reads.get(0); GATKSAMRecord key = reads.get(0);
boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength(); boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
//System.out.printf("%s %s %b %b %d %d %d %d => %b%n", //System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
// read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(), // read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
// get the genome loc from the read // get the genome loc from the read
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read); GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter)); Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size())); if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
// Jump forward in the reference to this locus location // Jump forward in the reference to this locus location

View File

@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.DataSource; import org.broadinstitute.sting.gatk.walkers.DataSource;
import org.broadinstitute.sting.gatk.walkers.ReadWalker; import org.broadinstitute.sting.gatk.walkers.ReadWalker;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/* /*
* Copyright (c) 2009 The Broad Institute * Copyright (c) 2009 The Broad Institute
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
// if the read is mapped, create a metadata tracker // if the read is mapped, create a metadata tracker
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null; ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
final boolean keepMeP = walker.filter(refContext, read); final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
if (keepMeP) { if (keepMeP) {
M x = walker.map(refContext, read, tracker); // the tracker can be null M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
sum = walker.reduce(x, sum); sum = walker.reduce(x, sum);
} }

View File

@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.picard.reference.ReferenceSequence; import net.sf.picard.reference.ReferenceSequence;
import net.sf.picard.reference.ReferenceSequenceFile; import net.sf.picard.reference.ReferenceSequenceFile;
import net.sf.picard.reference.ReferenceSequenceFileFactory; import net.sf.picard.reference.ReferenceSequenceFileFactory;
import net.sf.samtools.SAMRecord;
import net.sf.samtools.util.StringUtil; import net.sf.samtools.util.StringUtil;
import org.broadinstitute.sting.commandline.Advanced; import org.broadinstitute.sting.commandline.Advanced;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation; import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
import org.broadinstitute.sting.utils.clipreads.ReadClipper; import org.broadinstitute.sting.utils.clipreads.ReadClipper;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.io.File; import java.io.File;
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
/** /**
* The reads map function. * The reads map function.
* *
*
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a GATKSAMRecord
* @return the ReadClipper object describing what should be done to clip this read * @return the ReadClipper object describing what should be done to clip this read
*/ */
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) { if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) { if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
read = ReadUtils.replaceSoftClipsWithMatches(read); read = ReadUtils.replaceSoftClipsWithMatches(read);
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/ */
private void clipSequences(ReadClipperWithData clipper) { private void clipSequences(ReadClipperWithData clipper) {
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
SAMRecord read = clipper.getRead(); GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData(); ClippingData data = clipper.getData();
for (SeqToClip stc : sequencesToClip) { for (SeqToClip stc : sequencesToClip) {
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
* @param stop * @param stop
* @return * @return
*/ */
private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) { private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
if (read.getReadNegativeStrandFlag()) if (read.getReadNegativeStrandFlag())
return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1); return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
else else
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
*/ */
private void clipCycles(ReadClipperWithData clipper) { private void clipCycles(ReadClipperWithData clipper) {
if (cyclesToClip != null) { if (cyclesToClip != null) {
SAMRecord read = clipper.getRead(); GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData(); ClippingData data = clipper.getData();
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
* @param clipper * @param clipper
*/ */
private void clipBadQualityScores(ReadClipperWithData clipper) { private void clipBadQualityScores(ReadClipperWithData clipper) {
SAMRecord read = clipper.getRead(); GATKSAMRecord read = clipper.getRead();
ClippingData data = clipper.getData(); ClippingData data = clipper.getData();
int readLen = read.getReadBases().length; int readLen = read.getReadBases().length;
byte[] quals = read.getBaseQualities(); byte[] quals = read.getBaseQualities();
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
if ( clipper == null ) if ( clipper == null )
return data; return data;
SAMRecord clippedRead = clipper.clipRead(clippingRepresentation); GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
if (outputBam != null) { if (outputBam != null) {
outputBam.addAlignment(clippedRead); outputBam.addAlignment(clippedRead);
} else { } else {
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
public class ReadClipperWithData extends ReadClipper { public class ReadClipperWithData extends ReadClipper {
private ClippingData data; private ClippingData data;
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) { public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
super(read); super(read);
data = new ClippingData(clipSeqs); data = new ClippingData(clipSeqs);
} }

View File

@ -1,10 +1,10 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter; import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter; import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.util.List; import java.util.List;
import java.util.Set; import java.util.Set;
@ -20,11 +20,11 @@ import java.util.Set;
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) { public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
return true; // We are keeping all the reads return true; // We are keeping all the reads
} }
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ); public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );
// Given result of map function // Given result of map function
public abstract ReduceType reduceInit(); public abstract ReduceType reduceInit();

View File

@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.text.XReadLines; import org.broadinstitute.sting.utils.text.XReadLines;
import java.io.File; import java.io.File;
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite
/** /**
* The reads filter function. * The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't * @return true if the read passes the filter, false if it doesn't
*/ */
public boolean filter(ReferenceContext ref, SAMRecord read) { public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
return namesToKeep.contains(read.getReadName()); return namesToKeep.contains(read.getReadName());
} }
/** /**
* The reads map function. * The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a SAMRecord
* @return the read itself * @return the read itself
*/ */
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read; return read;
} }

View File

@ -1,9 +1,9 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.commandline.Output; import org.broadinstitute.sting.commandline.Output;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import java.io.PrintStream; import java.io.PrintStream;
import java.text.DecimalFormat; import java.text.DecimalFormat;
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
private FlagStat myStat = new FlagStat(); private FlagStat myStat = new FlagStat();
public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
myStat.readCount++; myStat.readCount++;
if (read.getReadFailsVendorQualityCheckFlag()) { if (read.getReadFailsVendorQualityCheckFlag()) {
myStat.QC_failure++; myStat.QC_failure++;

View File

@ -17,7 +17,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
*/ */
@By(DataSource.READS) @By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.INTERVAL) @PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class}) @ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
// Do we actually want to operate on the context? // Do we actually want to operate on the context?

View File

@ -34,6 +34,12 @@ public enum PartitionType {
*/ */
NONE, NONE,
/**
* The walker inputs can be chunked down to individual
* reads.
*/
READ,
/** /**
* The walker inputs can be chunked down to the * The walker inputs can be chunked down to the
* per-locus level. * per-locus level.

View File

@ -40,6 +40,7 @@ import java.util.TreeSet;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/** /**
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file. * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** /**
* The reads filter function. * The reads filter function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a SAMRecord
* @return true if the read passes the filter, false if it doesn't * @return true if the read passes the filter, false if it doesn't
*/ */
public boolean filter(ReferenceContext ref, SAMRecord read) { public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
// check the read group // check the read group
if ( readGroup != null ) { if ( readGroup != null ) {
SAMReadGroupRecord myReadGroup = read.getReadGroup(); SAMReadGroupRecord myReadGroup = read.getReadGroup();
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
/** /**
* The reads map function. * The reads map function.
*
* @param ref the reference bases that correspond to our read, if a reference was provided * @param ref the reference bases that correspond to our read, if a reference was provided
* @param read the read itself, as a SAMRecord * @param read the read itself, as a SAMRecord
* @return the read itself * @return the read itself
*/ */
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) { public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
return read; return read;
} }

View File

@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
/** /**
* Created by IntelliJ IDEA. * Created by IntelliJ IDEA.
@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
* To change this template use File | Settings | File Templates. * To change this template use File | Settings | File Templates.
*/ */
@Requires({DataSource.READS, DataSource.REFERENCE_BASES}) @Requires({DataSource.READS, DataSource.REFERENCE_BASES})
@PartitionBy(PartitionType.CONTIG) @PartitionBy(PartitionType.READ)
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> { public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
public boolean requiresOrderedReads() { return false; } public boolean requiresOrderedReads() { return false; }
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
/** Must return true for reads that need to be processed. Reads, for which this method return false will /** Must return true for reads that need to be processed. Reads, for which this method return false will
* be skipped by the engine and never passed to the walker. * be skipped by the engine and never passed to the walker.
*/ */
public boolean filter(ReferenceContext ref, SAMRecord read) { public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
// We are keeping all the reads // We are keeping all the reads
return true; return true;
} }
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker); public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
} }

View File

@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.Argument; import org.broadinstitute.sting.commandline.Argument;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker; import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
import org.broadinstitute.sting.utils.sam.ReadUtils; import org.broadinstitute.sting.utils.sam.ReadUtils;
import java.util.ArrayList; import java.util.ArrayList;
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
logger.info("SplitSamFile version: " + VERSION); logger.info("SplitSamFile version: " + VERSION);
} }
public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) { public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
return read; return read;
} }

View File

@ -25,15 +25,17 @@
package org.broadinstitute.sting.gatk.walkers; package org.broadinstitute.sting.gatk.walkers;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger; import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.CommandLineGATK; import org.broadinstitute.sting.gatk.CommandLineGATK;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter; import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
import org.broadinstitute.sting.gatk.samples.Sample;
import org.broadinstitute.sting.gatk.samples.SampleDB;
import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.baq.BAQ; import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.collections.Pair; import org.broadinstitute.sting.utils.collections.Pair;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
import java.util.List; import java.util.List;
@ -77,6 +79,23 @@ public abstract class Walker<MapType, ReduceType> {
return toolkit; return toolkit;
} }
/**
* Gets the master sequence dictionary for this walker
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
* @return
*/
protected SAMSequenceDictionary getMasterSequenceDictionary() {
return getToolkit().getMasterSequenceDictionary();
}
protected SampleDB getSampleDB() {
return getToolkit().getSampleDB();
}
protected Sample getSample(final String id) {
return getToolkit().getSampleDB().getSample(id);
}
/** /**
* (conceptual static) method that states whether you want to see reads piling up at a locus * (conceptual static) method that states whether you want to see reads piling up at a locus
* that contain a deletion at the locus. * that contain a deletion at the locus.

View File

@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
continue; continue;
} }
// todo -- actually care about indel length from the pileup (agnostic at the moment) // todo -- actually care about indel length from the pileup (agnostic at the moment)
int refCount = indelPileup.size(); int refCount = indelPileup.getNumberOfElements();
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions(); int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
if ( refCount + altCount == 0 ) { if ( refCount + altCount == 0 ) {

View File

@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
if (!g.isHet()) if (!g.isHet())
return null; return null;
Set<Allele> altAlleles = vc.getAlternateAlleles(); Collection<Allele> altAlleles = vc.getAlternateAlleles();
if ( altAlleles.size() == 0 ) if ( altAlleles.size() == 0 )
return null; return null;

Some files were not shown because too many files have changed in this diff Show More