Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/stable
This commit is contained in:
commit
6a5d5e7ac9
|
|
@ -0,0 +1,20 @@
|
|||
/*.bam
|
||||
/*.bai
|
||||
/*.bed
|
||||
*.idx
|
||||
*~
|
||||
/*.vcf
|
||||
/*.txt
|
||||
/*.csh
|
||||
/.*
|
||||
/*.pdf
|
||||
/*.eval
|
||||
*.ipr
|
||||
*.iws
|
||||
*.iml
|
||||
.DS_Store
|
||||
queueScatterGather
|
||||
/foo*
|
||||
/bar*
|
||||
integrationtests/
|
||||
public/testdata/onTheFlyOutputTest.vcf
|
||||
156
build.xml
156
build.xml
|
|
@ -28,6 +28,8 @@
|
|||
|
||||
<property name="build.dir" value="build" />
|
||||
<property name="dist.dir" value="dist" />
|
||||
<property name="contract.dump.dir" value="dump" />
|
||||
<property name="pipelinetest.dir" value="pipelinetests" />
|
||||
<property name="lib.dir" value="lib" />
|
||||
<property name="external.dir" value="external" />
|
||||
<property name="public.dir" value="public" />
|
||||
|
|
@ -35,18 +37,26 @@
|
|||
<property name="java.public.source.dir" value="${public.dir}/java/src" />
|
||||
<property name="java.private.source.dir" value="${private.dir}/java/src" />
|
||||
<property name="java.classes" value="${build.dir}/java/classes" />
|
||||
<property name="R.public.scripts.dir" value="${public.dir}/R/scripts" />
|
||||
<property name="R.private.scripts.dir" value="${private.dir}/R/scripts" />
|
||||
<property name="R.public.src.dir" value="${public.dir}/R/src" />
|
||||
<!-- Legacy: Installing libraries back into the source directory
|
||||
instead of the build or dist directory... intentionally avoids ant clean?? -->
|
||||
<property name="R.library.dir" value="${public.dir}/R" />
|
||||
<property name="R.tar.dir" value="${build.dir}/R/src" />
|
||||
<property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
|
||||
<property name="resource.file" value="StingText.properties" />
|
||||
<property name="resource.path" value="${java.classes}/StingText.properties" />
|
||||
|
||||
<property name="scala.public.source.dir" value="${public.dir}/scala/src" />
|
||||
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
|
||||
<property name="scala.private.source.dir" value="${private.dir}/scala/src" />
|
||||
<property name="scala.classes" value="${build.dir}/scala/classes" />
|
||||
|
||||
<property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
|
||||
|
||||
<property name="javadoc.dir" value="javadoc" />
|
||||
<property name="scaladoc.dir" value="scaladoc" />
|
||||
|
||||
|
||||
<!-- Contracts for Java -->
|
||||
<!-- By default, enabled only for test targets -->
|
||||
<!-- To disable for test targets, run with -Duse.contracts=false -->
|
||||
|
|
@ -60,7 +70,7 @@
|
|||
|
||||
<!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
|
||||
<property name="halt" value="yes" />
|
||||
|
||||
|
||||
<!-- should our unit test output go to a file or the screen?
|
||||
false means it goes to the screen (default) true to file -->
|
||||
<property name="usefile" value="false" />
|
||||
|
|
@ -82,7 +92,7 @@
|
|||
<patternset refid="java.source.pattern" />
|
||||
</fileset>
|
||||
|
||||
<!-- terrible hack to get gatkdocs to see all files -->
|
||||
<!-- terrible hack to get gatkdocs to see all files -->
|
||||
<patternset id="all.java.source.pattern">
|
||||
<include name="${java.public.source.dir}/**/*.java" />
|
||||
<include name="${java.private.source.dir}/**/*.java" />
|
||||
|
|
@ -113,7 +123,7 @@
|
|||
<exclude name="testng*.jar" />
|
||||
<exclude name="bcel*.jar" />
|
||||
</patternset>
|
||||
|
||||
|
||||
<path id="external.dependencies">
|
||||
<fileset dir="${lib.dir}">
|
||||
<patternset refid="dependency.mask" />
|
||||
|
|
@ -154,16 +164,18 @@
|
|||
<property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
|
||||
<property name="ivy.settings.dir" value="settings"/>
|
||||
<property file="${ivy.settings.dir}/ivysettings.properties"/>
|
||||
|
||||
|
||||
<mkdir dir="${lib.dir}"/>
|
||||
<mkdir dir="${ivy.jar.dir}"/>
|
||||
|
||||
<!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
|
||||
<get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
|
||||
dest="${ivy.jar.dir}/${ivy.jar.file}"
|
||||
usetimestamp="true"/>
|
||||
<taskdef resource="org/apache/ivy/ant/antlib.xml"
|
||||
uri="antlib:org.apache.ivy.ant"
|
||||
classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
|
||||
|
||||
<ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
|
||||
<property name="init.resolve.done" value="true"/>
|
||||
</target>
|
||||
|
|
@ -209,11 +221,11 @@
|
|||
<equals arg1="${git.describe.exit.value}" arg2="0" />
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
|
||||
<target name="tagged.build.version" depends="git.describe" if="git.describe.succeeded">
|
||||
<property name="build.version" value="${git.describe.output}" />
|
||||
</target>
|
||||
|
||||
|
||||
<target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
|
||||
<exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
|
||||
<arg line="rev-parse HEAD" />
|
||||
|
|
@ -222,11 +234,11 @@
|
|||
<equals arg1="${git.rev-parse.exit.value}" arg2="0" />
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
|
||||
<target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
|
||||
<property name="build.version" value="${git.rev-parse.output}" />
|
||||
</target>
|
||||
|
||||
|
||||
<target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
|
||||
<!-- Set build.version to exported if no other value has been set -->
|
||||
<property name="build.version" value="exported" />
|
||||
|
|
@ -264,7 +276,7 @@
|
|||
<echo message="Scala build : ${scala.target}"/>
|
||||
<echo message="source revision : ${build.version}"/>
|
||||
<echo message="build time : ${build.timestamp}" />
|
||||
|
||||
|
||||
<condition property="include.private">
|
||||
<equals arg1="${gatk.target}" arg2="private" casesensitive="false" />
|
||||
</condition>
|
||||
|
|
@ -310,13 +322,13 @@
|
|||
<target name="gatk.compile.public.source" depends="init,resolve">
|
||||
<javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
|
||||
<compilerarg value="-proc:none"/>
|
||||
</javac>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="gatk.compile.private.source" depends="gatk.compile.public.source" if="include.private">
|
||||
<javac fork="true" srcdir="${java.private.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
|
||||
<compilerarg value="-proc:none"/>
|
||||
</javac>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source">
|
||||
|
|
@ -325,11 +337,11 @@
|
|||
<property name="dist.dir" value="${external.dist.dir}" />
|
||||
<property name="gatk.classpath" value="${external.gatk.classpath}" />
|
||||
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
|
||||
</subant>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="gatk.compile.source"
|
||||
depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
|
||||
<target name="gatk.compile.source"
|
||||
depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
|
||||
description="compile the GATK source" />
|
||||
|
||||
<target name="gatk.contracts.public" depends="gatk.compile.source" if="include.contracts">
|
||||
|
|
@ -339,9 +351,9 @@
|
|||
<pathelement path="${java.classes}" />
|
||||
</classpath>
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
|
||||
<compilerarg value="-proc:only"/>
|
||||
</javac>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="check.contracts.private" depends="gatk.contracts.public">
|
||||
|
|
@ -360,14 +372,14 @@
|
|||
<pathelement path="${java.classes}" />
|
||||
</classpath>
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
|
||||
<compilerarg value="-proc:only"/>
|
||||
</javac>
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
|
||||
<target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
|
||||
description="create GATK contracts" if="include.contracts" />
|
||||
|
||||
|
||||
<target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
|
||||
|
||||
<target name="init.queue-extensions.generate" depends="gatk.compile">
|
||||
|
|
@ -411,9 +423,9 @@
|
|||
<src path="${scala.public.source.dir}" />
|
||||
<src path="${queue-extensions.source.dir}" />
|
||||
<include name="**/*.scala"/>
|
||||
</scalac>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
|
||||
<target name="check.scala.private" depends="scala.compile.public">
|
||||
<condition property="include.scala.private">
|
||||
<and>
|
||||
|
|
@ -422,12 +434,12 @@
|
|||
</and>
|
||||
</condition>
|
||||
</target>
|
||||
|
||||
|
||||
<target name="scala.compile.private" depends="check.scala.private" if="include.scala.private">
|
||||
<scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
|
||||
<src path="${scala.private.source.dir}" />
|
||||
<include name="**/*.scala"/>
|
||||
</scalac>
|
||||
</scalac>
|
||||
</target>
|
||||
|
||||
<target name="scala.compile" depends="scala.compile.public,scala.compile.private" if="scala.include" description="compile Scala" />
|
||||
|
|
@ -530,6 +542,11 @@
|
|||
|
||||
<target name="sting.compile" depends="gatk.compile, scala.compile" />
|
||||
|
||||
<target name="R.public.tar">
|
||||
<mkdir dir="${R.tar.dir}/${R.package.path}" />
|
||||
<tar compression="gzip" basedir="${R.public.src.dir}/${R.package.path}" includes="gsalib/**" destfile="${R.tar.dir}/${R.package.path}/gsalib.tar.gz" />
|
||||
</target>
|
||||
|
||||
<target name="init.jar" depends="sting.compile,extracthelp">
|
||||
<mkdir dir="${dist.dir}"/>
|
||||
<copy todir="${dist.dir}">
|
||||
|
|
@ -537,7 +554,7 @@
|
|||
</copy>
|
||||
</target>
|
||||
|
||||
<target name="sting-utils.jar" depends="gatk.compile, init.jar">
|
||||
<target name="sting-utils.jar" depends="gatk.compile, init.jar, R.public.tar">
|
||||
<jar jarfile="${dist.dir}/StingUtils.jar">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="**/utils/**/*.class"/>
|
||||
|
|
@ -549,6 +566,15 @@
|
|||
<fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
|
||||
<fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
|
||||
<fileset dir="${R.tar.dir}">
|
||||
<include name="**/${R.package.path}/**/*.tar.gz"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.public.scripts.dir}">
|
||||
<include name="**/utils/**/*.R"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
|
||||
<include name="**/utils/**/*.R"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" />
|
||||
</manifest>
|
||||
|
|
@ -577,6 +603,14 @@
|
|||
<include name="**/gatk/**/*.class" />
|
||||
<include name="**/alignment/**/*.class"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.public.scripts.dir}">
|
||||
<include name="**/gatk/**/*.R"/>
|
||||
<include name="**/alignment/**/*.R"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
|
||||
<include name="**/gatk/**/*.R"/>
|
||||
<include name="**/alignment/**/*.R"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
|
||||
</manifest>
|
||||
|
|
@ -591,6 +625,14 @@
|
|||
<include name="**/analyzecovariates/**/*.class" />
|
||||
<include name="**/gatk/walkers/recalibration/*.class" />
|
||||
</fileset>
|
||||
<fileset dir="${R.public.scripts.dir}">
|
||||
<include name="**/analyzecovariates/**/*.R"/>
|
||||
<include name="**/gatk/walkers/recalibration/**/*.R"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
|
||||
<include name="**/analyzecovariates/**/*.R"/>
|
||||
<include name="**/gatk/walkers/recalibration/**/*.R"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
|
||||
</manifest>
|
||||
|
|
@ -603,28 +645,7 @@
|
|||
<fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
|
||||
</subant>
|
||||
</target>
|
||||
<!--
|
||||
<target name="gatk.oneoffs.jar" depends="gatk.compile, init.jar"
|
||||
description="generate the GATK oneoffs distribution" if="include.oneoffs">
|
||||
<jar jarfile="${dist.dir}/CompareBAMAlignments.jar" whenmanifestonly="skip">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="**/tools/**/*.class" />
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.oneoffprojects.tools.CompareBAMAlignments" />
|
||||
</manifest>
|
||||
</jar>
|
||||
|
||||
<jar jarfile="${dist.dir}/SliceBams.jar" whenmanifestonly="skip">
|
||||
<fileset dir="${java.classes}">
|
||||
<include name="**/tools/**/*.class" />
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.SliceBams" />
|
||||
</manifest>
|
||||
</jar>
|
||||
</target>
|
||||
-->
|
||||
<target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
|
||||
<jar jarfile="${dist.dir}/GATKScala.jar">
|
||||
<fileset dir="${scala.classes}">
|
||||
|
|
@ -641,6 +662,12 @@
|
|||
<fileset dir="${java.classes}">
|
||||
<include name="org/broadinstitute/sting/queue/**/*.class" />
|
||||
</fileset>
|
||||
<fileset dir="${R.public.scripts.dir}">
|
||||
<include name="org/broadinstitute/sting/queue/**/*.R"/>
|
||||
</fileset>
|
||||
<fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
|
||||
<include name="org/broadinstitute/sting/queue/**/*.R"/>
|
||||
</fileset>
|
||||
<manifest>
|
||||
<attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
|
||||
</manifest>
|
||||
|
|
@ -680,20 +707,7 @@
|
|||
</jar>
|
||||
|
||||
</target>
|
||||
<!--
|
||||
<target name="gatk.oneoffs.manifests" depends="gatk.oneoffs.jar, init.manifests" if="include.oneoffs">
|
||||
<jar jarfile="${dist.dir}/CompareBAMAlignments.jar" update="true" whenmanifestonly="skip">
|
||||
<manifest>
|
||||
<attribute name="Class-Path" value="${jar.classpath}" />
|
||||
</manifest>
|
||||
</jar>
|
||||
<jar jarfile="${dist.dir}/SliceBams.jar" update="true" whenmanifestonly="skip">
|
||||
<manifest>
|
||||
<attribute name="Class-Path" value="${jar.classpath}" />
|
||||
</manifest>
|
||||
</jar>
|
||||
</target>
|
||||
-->
|
||||
|
||||
<target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
|
||||
<jar jarfile="${dist.dir}/Queue.jar" update="true" >
|
||||
<manifest>
|
||||
|
|
@ -778,10 +792,6 @@
|
|||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
<compilerarg value="-proc:none"/>
|
||||
<!--
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
-->
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
|
|
@ -798,10 +808,6 @@
|
|||
<pathelement location="${testng.jar}"/>
|
||||
</classpath>
|
||||
<compilerarg value="-proc:none"/>
|
||||
<!--
|
||||
<compilerarg value="-Acom.google.java.contract.debug"/>
|
||||
<compilerarg value="-Acom.google.java.contract.dump=dump/"/>
|
||||
-->
|
||||
</javac>
|
||||
</target>
|
||||
|
||||
|
|
@ -849,6 +855,9 @@
|
|||
<pathelement location="${java.private.test.classes}" />
|
||||
<pathelement location="${scala.public.test.classes}" />
|
||||
<pathelement location="${scala.private.test.classes}" />
|
||||
<pathelement location="${R.tar.dir}" />
|
||||
<pathelement location="${R.public.scripts.dir}" />
|
||||
<pathelement location="${R.private.scripts.dir}" />
|
||||
</path>
|
||||
|
||||
<path id="testng.gatk.releasetest.classpath">
|
||||
|
|
@ -1185,19 +1194,18 @@
|
|||
</target>
|
||||
|
||||
<target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
|
||||
<delete dir="out"/>
|
||||
<delete dir="${build.dir}"/>
|
||||
<delete dir="${lib.dir}"/>
|
||||
<delete dir="dump"/>
|
||||
<delete dir="${contract.dump.dir}"/>
|
||||
<delete dir="${staging.dir}"/>
|
||||
<delete dir="${dist.dir}"/>
|
||||
<delete dir="pipelinetests"/>
|
||||
<delete dir="${pipelinetest.dir}"/>
|
||||
</target>
|
||||
|
||||
<!-- Build gsalib R module -->
|
||||
<target name="gsalib">
|
||||
<exec executable="R" failonerror="true">
|
||||
<arg line="R CMD INSTALL -l public/R/ public/R/src/gsalib/" />
|
||||
<arg line="R CMD INSTALL -l ${R.library.dir} ${R.public.src.dir}/${R.package.path}/gsalib" />
|
||||
</exec>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
library(tools)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
|
|
@ -47,6 +49,9 @@ if( is.numeric(c$Covariate) ) {
|
|||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot mean quality versus the covariate
|
||||
|
|
@ -69,6 +74,10 @@ if( is.numeric(c$Covariate) ) {
|
|||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot histogram of the covariate
|
||||
#
|
||||
|
|
@ -106,3 +115,7 @@ if( is.numeric(c$Covariate) ) {
|
|||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
}
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
library(tools)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
|
||||
input = args[1]
|
||||
|
|
@ -33,6 +35,10 @@ points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
|
|||
abline(0,1, lty=2)
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot Q empirical histogram
|
||||
#
|
||||
|
|
@ -52,6 +58,10 @@ points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
|||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
||||
#
|
||||
# Plot Q reported histogram
|
||||
#
|
||||
|
|
@ -68,3 +78,7 @@ plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yM
|
|||
points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
|
||||
axis(2,axTicks(2), format(axTicks(2), scientific=F))
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
#!/bin/env Rscript
|
||||
|
||||
library(tools)
|
||||
|
||||
args <- commandArgs(TRUE)
|
||||
verbose = TRUE
|
||||
|
||||
|
|
@ -85,3 +87,7 @@ if ( ! is.null(sensitivity) ) {
|
|||
}
|
||||
|
||||
dev.off()
|
||||
|
||||
if (exists('compactPDF')) {
|
||||
compactPDF(outfile)
|
||||
}
|
||||
|
|
@ -12,20 +12,20 @@ if ( onCMDLine ) {
|
|||
inputFileName = args[1]
|
||||
outputPDF = args[2]
|
||||
} else {
|
||||
#inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
|
||||
inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
inputFileName = "~/Desktop/broadLocal/GATK/unstable/wgs.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
|
||||
#inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
|
||||
outputPDF = NA
|
||||
}
|
||||
|
||||
RUNTIME_UNITS = "(sec)"
|
||||
ORIGINAL_UNITS_TO_SECONDS = 1/1000
|
||||
RUNTIME_UNITS = "(hours)"
|
||||
ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
|
||||
|
||||
#
|
||||
# Helper function to aggregate all of the jobs in the report across all tables
|
||||
#
|
||||
allJobsFromReport <- function(report) {
|
||||
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
|
||||
names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime")
|
||||
sub <- lapply(report, function(table) table[,names])
|
||||
do.call("rbind", sub)
|
||||
}
|
||||
|
|
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
|
|||
#
|
||||
# Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
|
||||
#
|
||||
plotJobsGantt <- function(gatkReport, sortOverall) {
|
||||
plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
|
||||
allJobs = allJobsFromReport(gatkReport)
|
||||
if ( sortOverall ) {
|
||||
title = "All jobs, by analysis, by start time"
|
||||
|
|
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
|
|||
}
|
||||
allJobs$index = 1:nrow(allJobs)
|
||||
minTime = min(allJobs$startTime)
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relStartTime = allJobs$startTime - minTime
|
||||
allJobs$relDoneTime = allJobs$doneTime - minTime
|
||||
allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
|
||||
maxRelTime = max(allJobs$relDoneTime)
|
||||
p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + theme_bw()
|
||||
p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
|
||||
if ( includeText )
|
||||
p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
|
||||
p <- p + xlim(0, maxRelTime * 1.1)
|
||||
p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job")
|
||||
p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS))
|
||||
p <- p + ylab("Job number")
|
||||
p <- p + opts(title=title)
|
||||
print(p)
|
||||
}
|
||||
|
|
@ -119,7 +121,7 @@ plotGroup <- function(groupTable) {
|
|||
if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
|
||||
# todo -- how do we group by annotations?
|
||||
p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
|
||||
p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
|
||||
p <- p + xlab(paste("runtime", RUNTIME_UNITS)) + ylab("No. of jobs")
|
||||
p <- p + opts(title=paste("Job runtime histogram for", name))
|
||||
print(p)
|
||||
}
|
||||
|
|
@ -139,9 +141,9 @@ print(paste("Project :", inputFileName))
|
|||
|
||||
convertUnits <- function(gatkReportData) {
|
||||
convertGroup <- function(g) {
|
||||
g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
|
||||
g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
|
||||
g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
|
||||
g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
|
||||
g
|
||||
}
|
||||
lapply(gatkReportData, convertGroup)
|
||||
|
|
@ -157,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
|
|||
pdf(outputPDF, height=8.5, width=11)
|
||||
}
|
||||
|
||||
plotJobsGantt(gatkReportData, T)
|
||||
plotJobsGantt(gatkReportData, F)
|
||||
plotJobsGantt(gatkReportData, T, F)
|
||||
plotJobsGantt(gatkReportData, F, F)
|
||||
plotProgressByTime(gatkReportData)
|
||||
for ( group in gatkReportData ) {
|
||||
plotGroup(group)
|
||||
|
|
@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
|
|||
.gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
|
||||
}
|
||||
|
||||
gatkreport = as.list(tableEnv);
|
||||
gatkreport = as.list(tableEnv, all.names=TRUE);
|
||||
}
|
||||
|
Before Width: | Height: | Size: 49 KiB After Width: | Height: | Size: 49 KiB |
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
|
|
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
|
@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of reads aligned by this map (aka 1).
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
//logger.info(String.format("examining read %s", read.getReadName()));
|
||||
|
||||
byte[] bases = read.getReadBases();
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.WalkerName;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
|
|
@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
SAMRecord alignedRead = aligner.align(read,header);
|
||||
out.addAlignment(alignedRead);
|
||||
return 1;
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
|
||||
package org.broadinstitute.sting.alignment;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
|
||||
import org.broadinstitute.sting.alignment.bwa.BWTFiles;
|
||||
import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
|
||||
|
|
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
|
|||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.util.Iterator;
|
||||
|
|
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
|
|||
|
||||
/**
|
||||
* Aligns a read to the given reference.
|
||||
*
|
||||
* @param ref Reference over the read. Read will most likely be unmapped, so ref will be null.
|
||||
* @param read Read to align.
|
||||
* @return Number of alignments found for this read.
|
||||
*/
|
||||
@Override
|
||||
public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
|
||||
if(alignmentIterator.hasNext()) {
|
||||
int numAlignments = alignmentIterator.next().length;
|
||||
|
|
|
|||
|
|
@ -25,6 +25,9 @@
|
|||
|
||||
package org.broadinstitute.sting.analyzecovariates;
|
||||
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
|
|
@ -33,14 +36,16 @@ import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
|
|||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
|
||||
import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
|
||||
import org.broadinstitute.sting.utils.R.RScriptExecutor;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.io.Resource;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Pattern;
|
||||
|
|
@ -71,15 +76,13 @@ import java.util.regex.Pattern;
|
|||
* </ul>
|
||||
*
|
||||
* <p>
|
||||
* NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
|
||||
* must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
|
||||
* not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
|
||||
* this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
|
||||
* For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
|
||||
* NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
|
||||
* See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
|
||||
*
|
||||
* <p>
|
||||
* See the GATK wiki for a tutorial and example recalibration accuracy plots.
|
||||
* http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
|
||||
* <a target="gatkwiki" href="http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration"
|
||||
* >http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration</a>
|
||||
*
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
|
|
@ -91,7 +94,6 @@ import java.util.regex.Pattern;
|
|||
* java -Xmx4g -jar AnalyzeCovariates.jar \
|
||||
* -recalFile /path/to/recal.table.csv \
|
||||
* -outputDir /path/to/output_dir/ \
|
||||
* -resources resources/ \
|
||||
* -ignoreQ 5
|
||||
* </pre>
|
||||
*
|
||||
|
|
@ -101,6 +103,11 @@ import java.util.regex.Pattern;
|
|||
groupName = "AnalyzeCovariates",
|
||||
summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
|
||||
public class AnalyzeCovariates extends CommandLineProgram {
|
||||
final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class);
|
||||
|
||||
private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R";
|
||||
private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R";
|
||||
private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R";
|
||||
|
||||
/////////////////////////////
|
||||
// Command Line Arguments
|
||||
|
|
@ -114,11 +121,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
@Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
|
||||
private String RECAL_FILE = "output.recal_data.csv";
|
||||
@Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
|
||||
private String OUTPUT_DIR = "analyzeCovariates/";
|
||||
@Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
|
||||
private String PATH_TO_RSCRIPT = "Rscript";
|
||||
@Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
|
||||
private String PATH_TO_RESOURCES = "public/R/";
|
||||
private File OUTPUT_DIR = new File("analyzeCovariates");
|
||||
@Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
|
||||
private int IGNORE_QSCORES_LESS_THAN = 5;
|
||||
@Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
|
||||
|
|
@ -154,29 +157,26 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
protected int execute() {
|
||||
|
||||
// create the output directory where all the data tables and plots will go
|
||||
try {
|
||||
Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
|
||||
} catch (IOException e) {
|
||||
System.out.println("Couldn't create directory: " + OUTPUT_DIR);
|
||||
System.out.println("User is responsible for making sure the output directory exists.");
|
||||
}
|
||||
if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
|
||||
if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
|
||||
if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs())
|
||||
throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR);
|
||||
|
||||
if (!RScriptExecutor.RSCRIPT_EXISTS)
|
||||
Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated.");
|
||||
|
||||
// initialize all the data from the csv file and allocate the list of covariates
|
||||
System.out.println("Reading in input csv file...");
|
||||
logger.info("Reading in input csv file...");
|
||||
initializeData();
|
||||
System.out.println("...Done!");
|
||||
logger.info("...Done!");
|
||||
|
||||
// output data tables for Rscript to read in
|
||||
System.out.println("Writing out intermediate tables for R...");
|
||||
logger.info("Writing out intermediate tables for R...");
|
||||
writeDataTables();
|
||||
System.out.println("...Done!");
|
||||
logger.info("...Done!");
|
||||
|
||||
// perform the analysis using Rscript and output the plots
|
||||
System.out.println("Calling analysis R scripts and writing out figures...");
|
||||
logger.info("Calling analysis R scripts and writing out figures...");
|
||||
callRScripts();
|
||||
System.out.println("...Done!");
|
||||
logger.info("...Done!");
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -287,37 +287,40 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
|
||||
String readGroup = readGroupKey.toString();
|
||||
RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
|
||||
System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations" );
|
||||
System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
|
||||
logger.info(String.format(
|
||||
"Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
|
||||
readGroup, readGroupDatum.getNumObservations(),
|
||||
readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
|
||||
|
||||
// for each covariate
|
||||
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
|
||||
Covariate cov = requestedCovariates.get(iii);
|
||||
|
||||
// Create a PrintStream
|
||||
PrintStream output = null;
|
||||
File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
|
||||
PrintStream output;
|
||||
try {
|
||||
output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"));
|
||||
|
||||
} catch (FileNotFoundException e) {
|
||||
System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
|
||||
System.exit(-1);
|
||||
output = new PrintStream(FileUtils.openOutputStream(outputFile));
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotCreateOutputFile(outputFile, e);
|
||||
}
|
||||
|
||||
// Output the header
|
||||
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
|
||||
try {
|
||||
// Output the header
|
||||
output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
|
||||
|
||||
for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
|
||||
output.print( covariateKey.toString() + "\t" ); // Covariate
|
||||
RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
|
||||
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
|
||||
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical
|
||||
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
|
||||
output.println( thisDatum.getNumObservations() ); // nBases
|
||||
for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
|
||||
output.print( covariateKey.toString() + "\t" ); // Covariate
|
||||
RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
|
||||
output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" ); // Qreported
|
||||
output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" ); // Qempirical
|
||||
output.print( thisDatum.getNumMismatches() + "\t" ); // nMismatches
|
||||
output.println( thisDatum.getNumObservations() ); // nBases
|
||||
}
|
||||
} finally {
|
||||
// Close the PrintStream
|
||||
IOUtils.closeQuietly(output);
|
||||
}
|
||||
|
||||
// Close the PrintStream
|
||||
output.close();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
|
|
@ -327,10 +330,6 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
}
|
||||
|
||||
private void callRScripts() {
|
||||
RScriptExecutor.RScriptArgumentCollection argumentCollection =
|
||||
new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
|
||||
RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
|
||||
|
||||
int numReadGroups = 0;
|
||||
|
||||
// for each read group
|
||||
|
|
@ -338,23 +337,32 @@ public class AnalyzeCovariates extends CommandLineProgram {
|
|||
if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
|
||||
|
||||
String readGroup = readGroupKey.toString();
|
||||
System.out.println("Analyzing read group: " + readGroup);
|
||||
logger.info("Analyzing read group: " + readGroup);
|
||||
|
||||
// for each covariate
|
||||
for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
|
||||
Covariate cov = requestedCovariates.get(iii);
|
||||
final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
|
||||
final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
|
||||
if (DO_INDEL_QUALITY) {
|
||||
executor.callRScripts("plot_indelQuality.R", outputFilename,
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class));
|
||||
// The second argument is the name of the covariate in order to make the plots look nice
|
||||
executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
|
||||
executor.exec();
|
||||
} else {
|
||||
if( iii == 1 ) {
|
||||
// Analyze reported quality
|
||||
executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
|
||||
IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class));
|
||||
// The second argument is the Q scores that should be turned pink in the plot because they were ignored
|
||||
executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE);
|
||||
executor.exec();
|
||||
} else { // Analyze all other covariates
|
||||
executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
|
||||
cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
|
||||
RScriptExecutor executor = new RScriptExecutor();
|
||||
executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class));
|
||||
// The second argument is the name of the covariate in order to make the plots look nice
|
||||
executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
|
||||
executor.exec();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
/**
|
||||
* Maps indices of command line arguments to values paired with that argument.
|
||||
*/
|
||||
public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
|
||||
public final SortedMap<ArgumentMatchSite,List<String>> sites = new TreeMap<ArgumentMatchSite,List<String>>();
|
||||
|
||||
/**
|
||||
* An ordered, freeform collection of tags.
|
||||
|
|
@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
}
|
||||
|
||||
/**
|
||||
* A simple way of indicating that an argument with the given label and definition exists at this index.
|
||||
* A simple way of indicating that an argument with the given label and definition exists at this site.
|
||||
* @param label Label of the argument match. Must not be null.
|
||||
* @param definition The associated definition, if one exists. May be null.
|
||||
* @param index Position of the argument. Must not be null.
|
||||
* @param site Position of the argument. Must not be null.
|
||||
* @param tags ordered freeform text tags associated with this argument.
|
||||
*/
|
||||
public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) {
|
||||
this( label, definition, index, null, tags );
|
||||
public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) {
|
||||
this( label, definition, site, null, tags );
|
||||
}
|
||||
|
||||
/**
|
||||
* A simple way of indicating that an argument with the given label and definition exists at this index.
|
||||
* A simple way of indicating that an argument with the given label and definition exists at this site.
|
||||
* @param label Label of the argument match. Must not be null.
|
||||
* @param definition The associated definition, if one exists. May be null.
|
||||
* @param index Position of the argument. Must not be null.
|
||||
* @param site Position of the argument. Must not be null.
|
||||
* @param value Value for the argument at this position.
|
||||
* @param tags ordered freeform text tags associated with this argument.
|
||||
*/
|
||||
private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) {
|
||||
private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) {
|
||||
this.label = label;
|
||||
this.definition = definition;
|
||||
|
||||
ArrayList<String> values = new ArrayList<String>();
|
||||
if( value != null )
|
||||
values.add(value);
|
||||
indices.put(index,values );
|
||||
sites.put(site,values );
|
||||
|
||||
this.tags = tags;
|
||||
}
|
||||
|
|
@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
ArgumentMatch otherArgumentMatch = (ArgumentMatch)other;
|
||||
return this.definition.equals(otherArgumentMatch.definition) &&
|
||||
this.label.equals(otherArgumentMatch.label) &&
|
||||
this.indices.equals(otherArgumentMatch.indices) &&
|
||||
this.sites.equals(otherArgumentMatch.sites) &&
|
||||
this.tags.equals(otherArgumentMatch.tags);
|
||||
}
|
||||
|
||||
|
|
@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
* @param key Key which specifies the transform.
|
||||
* @return A variant of this ArgumentMatch with all keys transformed.
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
ArgumentMatch transform(Multiplexer multiplexer, Object key) {
|
||||
SortedMap<Integer,List<String>> newIndices = new TreeMap<Integer,List<String>>();
|
||||
for(Map.Entry<Integer,List<String>> index: indices.entrySet()) {
|
||||
SortedMap<ArgumentMatchSite,List<String>> newIndices = new TreeMap<ArgumentMatchSite,List<String>>();
|
||||
for(Map.Entry<ArgumentMatchSite,List<String>> site: sites.entrySet()) {
|
||||
List<String> newEntries = new ArrayList<String>();
|
||||
for(String entry: index.getValue())
|
||||
for(String entry: site.getValue())
|
||||
newEntries.add(multiplexer.transformArgument(key,entry));
|
||||
newIndices.put(index.getKey(),newEntries);
|
||||
newIndices.put(site.getKey(),newEntries);
|
||||
}
|
||||
ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition);
|
||||
newArgumentMatch.indices.putAll(newIndices);
|
||||
newArgumentMatch.sites.putAll(newIndices);
|
||||
return newArgumentMatch;
|
||||
}
|
||||
|
||||
|
|
@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
public Iterator<ArgumentMatch> iterator() {
|
||||
return new Iterator<ArgumentMatch>() {
|
||||
/**
|
||||
* Iterate over each the available index.
|
||||
* Iterate over each the available site.
|
||||
*/
|
||||
private Iterator<Integer> indexIterator = null;
|
||||
private Iterator<ArgumentMatchSite> siteIterator = null;
|
||||
|
||||
/**
|
||||
* Iterate over each available token.
|
||||
|
|
@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
private Iterator<String> tokenIterator = null;
|
||||
|
||||
/**
|
||||
* The next index to return. Null if none remain.
|
||||
* The next site to return. Null if none remain.
|
||||
*/
|
||||
Integer nextIndex = null;
|
||||
ArgumentMatchSite nextSite = null;
|
||||
|
||||
/**
|
||||
* The next token to return. Null if none remain.
|
||||
|
|
@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
String nextToken = null;
|
||||
|
||||
{
|
||||
indexIterator = indices.keySet().iterator();
|
||||
siteIterator = sites.keySet().iterator();
|
||||
prepareNext();
|
||||
}
|
||||
|
||||
|
|
@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
* @return True if there's another token waiting in the wings. False otherwise.
|
||||
*/
|
||||
public boolean hasNext() {
|
||||
return nextToken != null;
|
||||
return nextToken != null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
* @return The next ArgumentMatch in the series. Should never be null.
|
||||
*/
|
||||
public ArgumentMatch next() {
|
||||
if( nextIndex == null || nextToken == null )
|
||||
if( nextSite == null || nextToken == null )
|
||||
throw new IllegalStateException( "No more ArgumentMatches are available" );
|
||||
|
||||
ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags );
|
||||
ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags );
|
||||
prepareNext();
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the next ArgumentMatch to return. If no ArgumentMatches are available,
|
||||
* initialize nextIndex / nextToken to null.
|
||||
* initialize nextSite / nextToken to null.
|
||||
*/
|
||||
private void prepareNext() {
|
||||
if( tokenIterator != null && tokenIterator.hasNext() ) {
|
||||
nextToken = tokenIterator.next();
|
||||
}
|
||||
else {
|
||||
nextIndex = null;
|
||||
nextSite = null;
|
||||
nextToken = null;
|
||||
|
||||
// Do a nested loop. While more data is present in the inner loop, grab that data.
|
||||
// Otherwise, troll the outer iterator looking for more data.
|
||||
while( indexIterator.hasNext() ) {
|
||||
nextIndex = indexIterator.next();
|
||||
if( indices.get(nextIndex) != null ) {
|
||||
tokenIterator = indices.get(nextIndex).iterator();
|
||||
while( siteIterator.hasNext() ) {
|
||||
nextSite = siteIterator.next();
|
||||
if( sites.get(nextSite) != null ) {
|
||||
tokenIterator = sites.get(nextSite).iterator();
|
||||
if( tokenIterator.hasNext() ) {
|
||||
nextToken = tokenIterator.next();
|
||||
break;
|
||||
|
|
@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
* @param other The other match to merge into.
|
||||
*/
|
||||
public void mergeInto( ArgumentMatch other ) {
|
||||
indices.putAll(other.indices);
|
||||
sites.putAll(other.sites);
|
||||
}
|
||||
|
||||
/**
|
||||
* Associate a value with this merge maapping.
|
||||
* @param index index of the command-line argument to which this value is mated.
|
||||
* @param site site of the command-line argument to which this value is mated.
|
||||
* @param value Text representation of value to add.
|
||||
*/
|
||||
public void addValue( int index, String value ) {
|
||||
if( !indices.containsKey(index) || indices.get(index) == null )
|
||||
indices.put(index, new ArrayList<String>() );
|
||||
indices.get(index).add(value);
|
||||
public void addValue( ArgumentMatchSite site, String value ) {
|
||||
if( !sites.containsKey(site) || sites.get(site) == null )
|
||||
sites.put(site, new ArrayList<String>() );
|
||||
sites.get(site).add(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Does this argument already have a value at the given site?
|
||||
* Arguments are only allowed to be single-valued per site, and
|
||||
* flags aren't allowed a value at all.
|
||||
* @param index Index at which to check for values.
|
||||
* @param site Site at which to check for values.
|
||||
* @return True if the argument has a value at the given site. False otherwise.
|
||||
*/
|
||||
public boolean hasValueAtSite( int index ) {
|
||||
return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
|
||||
public boolean hasValueAtSite( ArgumentMatchSite site ) {
|
||||
return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
|
|||
*/
|
||||
public List<String> values() {
|
||||
List<String> values = new ArrayList<String>();
|
||||
for( int index: indices.keySet() ) {
|
||||
if( indices.get(index) != null )
|
||||
values.addAll(indices.get(index));
|
||||
for( ArgumentMatchSite site: sites.keySet() ) {
|
||||
if( sites.get(site) != null )
|
||||
values.addAll(sites.get(site));
|
||||
}
|
||||
return values;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,76 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
/**
|
||||
* Which source and the index within the source where an argument match was found.
|
||||
*/
|
||||
public class ArgumentMatchSite implements Comparable<ArgumentMatchSite> {
|
||||
private final ArgumentMatchSource source;
|
||||
private final int index;
|
||||
|
||||
public ArgumentMatchSite(ArgumentMatchSource source, int index) {
|
||||
this.source = source;
|
||||
this.index = index;
|
||||
}
|
||||
|
||||
public ArgumentMatchSource getSource() {
|
||||
return source;
|
||||
}
|
||||
|
||||
public int getIndex() {
|
||||
return index;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
ArgumentMatchSite that = (ArgumentMatchSite) o;
|
||||
|
||||
return (index == that.index) && (source == null ? that.source == null : source.equals(that.source));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = source != null ? source.hashCode() : 0;
|
||||
// Generated by intellij. No other special reason to this implementation. -ks
|
||||
result = 31 * result + index;
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(ArgumentMatchSite that) {
|
||||
int comp = this.source.compareTo(that.source);
|
||||
if (comp != 0)
|
||||
return comp;
|
||||
|
||||
// Both files are the same.
|
||||
if (this.index == that.index)
|
||||
return 0;
|
||||
return this.index < that.index ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,98 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
/**
|
||||
* Where an argument match originated, via the commandline or a file.
|
||||
*/
|
||||
public class ArgumentMatchSource implements Comparable<ArgumentMatchSource> {
|
||||
public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null);
|
||||
|
||||
private final ArgumentMatchSourceType type;
|
||||
private final File file;
|
||||
|
||||
/**
|
||||
* Creates an argument match source from the specified file.
|
||||
* @param file File specifying the arguments. Must not be null.
|
||||
*/
|
||||
public ArgumentMatchSource(File file) {
|
||||
this(ArgumentMatchSourceType.File, file);
|
||||
}
|
||||
|
||||
private ArgumentMatchSource(ArgumentMatchSourceType type, File file) {
|
||||
if (type == ArgumentMatchSourceType.File && file == null)
|
||||
throw new IllegalArgumentException("An argument match source of type File cannot have a null file.");
|
||||
this.type = type;
|
||||
this.file = file;
|
||||
}
|
||||
|
||||
public ArgumentMatchSourceType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public File getFile() {
|
||||
return file;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
ArgumentMatchSource that = (ArgumentMatchSource) o;
|
||||
|
||||
return (type == that.type) && (file == null ? that.file == null : file.equals(that.file));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = type != null ? type.hashCode() : 0;
|
||||
result = 31 * result + (file != null ? file.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compares two sources, putting the command line first, then files.
|
||||
*/
|
||||
@Override
|
||||
public int compareTo(ArgumentMatchSource that) {
|
||||
int comp = this.type.compareTo(that.type);
|
||||
if (comp != 0)
|
||||
return comp;
|
||||
|
||||
File f1 = this.file;
|
||||
File f2 = that.file;
|
||||
|
||||
if ((f1 == null) ^ (f2 == null)) {
|
||||
// If one of the files is null and the other is not
|
||||
// put the null file first
|
||||
return f1 == null ? -1 : 1;
|
||||
}
|
||||
|
||||
return f1 == null ? 0 : f1.compareTo(f2);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
/**
|
||||
* Type of where an argument match originated, via the commandline or a file.
|
||||
*/
|
||||
public enum ArgumentMatchSourceType {
|
||||
CommandLine, File
|
||||
}
|
||||
|
|
@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
* Collection matches from argument definition to argument value.
|
||||
* Package protected access is deliberate.
|
||||
*/
|
||||
Map<Integer,ArgumentMatch> argumentMatches = new TreeMap<Integer,ArgumentMatch>();
|
||||
Map<ArgumentMatchSite,ArgumentMatch> argumentMatches = new TreeMap<ArgumentMatchSite,ArgumentMatch>();
|
||||
|
||||
/**
|
||||
* Provide a place to put command-line argument values that don't seem to belong to
|
||||
|
|
@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
* @param site Site at which to check.
|
||||
* @return True if the site has a match. False otherwise.
|
||||
*/
|
||||
boolean hasMatch( int site ) {
|
||||
boolean hasMatch( ArgumentMatchSite site ) {
|
||||
return argumentMatches.containsKey( site );
|
||||
}
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
* @return The match present at the given site.
|
||||
* @throws IllegalArgumentException if site does not contain a match.
|
||||
*/
|
||||
ArgumentMatch getMatch( int site ) {
|
||||
ArgumentMatch getMatch( ArgumentMatchSite site ) {
|
||||
if( !argumentMatches.containsKey(site) )
|
||||
throw new IllegalArgumentException( "Site does not contain an argument: " + site );
|
||||
return argumentMatches.get(site);
|
||||
|
|
@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
|
||||
/**
|
||||
* Return all argument matches of this source.
|
||||
* @param parsingEngine Parsing engine.
|
||||
* @param argumentSource Argument source to match.
|
||||
* @return List of all matches.
|
||||
*/
|
||||
|
|
@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
* TODO: Generify this.
|
||||
* @param multiplexer Multiplexer that controls the transformation process.
|
||||
* @param key Key which specifies the transform.
|
||||
* @return new argument matches.
|
||||
*/
|
||||
ArgumentMatches transform(Multiplexer multiplexer, Object key) {
|
||||
ArgumentMatches newArgumentMatches = new ArgumentMatches();
|
||||
|
|
@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
|
|||
for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
|
||||
if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
|
||||
argumentMatch.mergeInto( match );
|
||||
for( int index: match.indices.keySet() )
|
||||
argumentMatches.put( index, argumentMatch );
|
||||
for( ArgumentMatchSite site: match.sites.keySet() )
|
||||
argumentMatches.put( site, argumentMatch );
|
||||
definitionExists = true;
|
||||
}
|
||||
}
|
||||
|
||||
if( !definitionExists ) {
|
||||
for( int index: match.indices.keySet() )
|
||||
argumentMatches.put( index, match );
|
||||
for( ArgumentMatchSite site: match.sites.keySet() )
|
||||
argumentMatches.put( site, match );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return parse(parsingEngine, source, type, matches, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* The actual argument parsing method.
|
||||
*
|
||||
* IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
|
||||
* that it's the best way to proceed for now. IntervalBindings can either be proper RodBindings (hence the use of
|
||||
* this parse() method) or can be Strings (representing raw intervals or the files containing them). If createIntervalBinding
|
||||
* is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
|
||||
* usual set of RodBindings. It also allows us in the future to be smart about tagging rods as intervals. One other
|
||||
* side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
|
||||
* continue to input them out of order (whereas Tribble Features are ordered).
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
|
||||
* @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
|
||||
*/
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
|
||||
ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
|
||||
String value = getArgumentValue( defaultDefinition, matches );
|
||||
Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
|
||||
|
|
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
if ( tags.getPositionalTags().size() > 2 ) {
|
||||
throw new UserException.CommandLineException(
|
||||
String.format("Unexpected number of positional tags for argument %s : %s. " +
|
||||
"Rod bindings only suport -X:type and -X:name,type argument styles",
|
||||
"Rod bindings only support -X:type and -X:name,type argument styles",
|
||||
value, source.field.getName()));
|
||||
} if ( tags.getPositionalTags().size() == 2 ) {
|
||||
// -X:name,type style
|
||||
|
|
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
if ( tribbleType == null )
|
||||
if ( tribbleType == null ) {
|
||||
// IntervalBindings allow streaming conversion of Strings
|
||||
if ( createIntervalBinding ) {
|
||||
return new IntervalBinding(value);
|
||||
}
|
||||
|
||||
if ( ! file.exists() ) {
|
||||
throw new UserException.CouldNotReadInputFile(file, "file does not exist");
|
||||
} else if ( ! file.canRead() || ! file.isFile() ) {
|
||||
|
|
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
"Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
|
||||
manager.userFriendlyListOfAvailableFeatures(parameterType)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
|
||||
RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(result,tags);
|
||||
parsingEngine.addRodBinding(result);
|
||||
Object result;
|
||||
if ( createIntervalBinding ) {
|
||||
result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
} else {
|
||||
RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
|
||||
parsingEngine.addTags(rbind, tags);
|
||||
parsingEngine.addRodBinding(rbind);
|
||||
result = rbind;
|
||||
}
|
||||
return result;
|
||||
} catch (InvocationTargetException e) {
|
||||
throw new UserException.CommandLineException(
|
||||
|
|
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parser for RodBinding objects
|
||||
*/
|
||||
class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
/**
|
||||
* We only want IntervalBinding class objects
|
||||
* @param type The type to check.
|
||||
* @return true if the provided class is an IntervalBinding.class
|
||||
*/
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
return isIntervalBinding(type);
|
||||
}
|
||||
|
||||
public static boolean isIntervalBinding( Class type ) {
|
||||
return IntervalBinding.class.isAssignableFrom(type);
|
||||
}
|
||||
|
||||
/**
|
||||
* See note from RodBindingArgumentTypeDescriptor.parse().
|
||||
*
|
||||
* @param parsingEngine parsing engine
|
||||
* @param source source
|
||||
* @param type type to check
|
||||
* @param matches matches
|
||||
* @return the IntervalBinding object.
|
||||
*/
|
||||
@Override
|
||||
public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
|
||||
return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse simple argument types: java primitives, wrapper classes, and anything that has
|
||||
* a simple String constructor.
|
||||
|
|
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
|||
class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
|
||||
@Override
|
||||
public boolean supports( Class type ) {
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
|
||||
if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
|
||||
if ( type.isPrimitive() ) return true;
|
||||
if ( type.isEnum() ) return true;
|
||||
if ( primitiveToWrapperMap.containsValue(type) ) return true;
|
||||
|
|
|
|||
|
|
@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
|||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.Locale;
|
||||
import java.util.*;
|
||||
|
||||
public abstract class CommandLineProgram {
|
||||
|
||||
|
|
@ -155,6 +152,7 @@ public abstract class CommandLineProgram {
|
|||
*
|
||||
* @param clp the command line program to execute
|
||||
* @param args the command line arguments passed in
|
||||
* @param dryRun dry run
|
||||
* @throws Exception when an exception occurs
|
||||
*/
|
||||
@SuppressWarnings("unchecked")
|
||||
|
|
@ -176,6 +174,8 @@ public abstract class CommandLineProgram {
|
|||
ParsingEngine parser = clp.parser = new ParsingEngine(clp);
|
||||
parser.addArgumentSource(clp.getClass());
|
||||
|
||||
Map<ArgumentMatchSource, List<String>> parsedArgs;
|
||||
|
||||
// process the args
|
||||
if (clp.canAddArgumentsDynamically()) {
|
||||
// if the command-line program can toss in extra args, fetch them and reparse the arguments.
|
||||
|
|
@ -196,14 +196,14 @@ public abstract class CommandLineProgram {
|
|||
Class[] argumentSources = clp.getArgumentSources();
|
||||
for (Class argumentSource : argumentSources)
|
||||
parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
|
||||
parser.parse(args);
|
||||
parsedArgs = parser.parse(args);
|
||||
|
||||
if (isHelpPresent(parser))
|
||||
printHelpAndExit(clp, parser);
|
||||
|
||||
if ( ! dryRun ) parser.validate();
|
||||
} else {
|
||||
parser.parse(args);
|
||||
parsedArgs = parser.parse(args);
|
||||
|
||||
if ( ! dryRun ) {
|
||||
if (isHelpPresent(parser))
|
||||
|
|
@ -230,7 +230,7 @@ public abstract class CommandLineProgram {
|
|||
}
|
||||
|
||||
// regardless of what happens next, generate the header information
|
||||
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
|
||||
HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs);
|
||||
|
||||
// call the execute
|
||||
CommandLineProgram.result = clp.execute();
|
||||
|
|
|
|||
|
|
@ -0,0 +1,108 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
|
||||
*
|
||||
* The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
|
||||
* the engine to construct intervals for traversal at runtime. The IntervalBinding can
|
||||
* either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
|
||||
* The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
|
||||
*
|
||||
* Note that this class is immutable.
|
||||
*/
|
||||
public final class IntervalBinding<T extends Feature> {
|
||||
|
||||
private RodBinding<T> featureIntervals;
|
||||
private String stringIntervals;
|
||||
|
||||
@Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
|
||||
public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
|
||||
featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
|
||||
}
|
||||
|
||||
@Requires({"intervalArgument != null"})
|
||||
public IntervalBinding(String intervalArgument) {
|
||||
stringIntervals = intervalArgument;
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
if ( featureIntervals != null )
|
||||
return featureIntervals.getSource();
|
||||
return stringIntervals;
|
||||
}
|
||||
|
||||
public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
|
||||
List<GenomeLoc> intervals;
|
||||
|
||||
if ( featureIntervals != null ) {
|
||||
intervals = new ArrayList<GenomeLoc>();
|
||||
|
||||
//RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
|
||||
// toolkit.getGenomeLocParser(),
|
||||
// toolkit.getArguments().unsafe);
|
||||
|
||||
// TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
|
||||
|
||||
FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
|
||||
if ( codec instanceof ReferenceDependentFeatureCodec )
|
||||
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
|
||||
try {
|
||||
FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
|
||||
AsciiLineReader lineReader = new AsciiLineReader(fis);
|
||||
codec.readHeader(lineReader);
|
||||
String line = lineReader.readLine();
|
||||
while ( line != null ) {
|
||||
intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(codec.decodeLoc(line)));
|
||||
line = lineReader.readLine();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UserException("Problem reading the interval file " + featureIntervals.getSource() + "; " + e.getMessage());
|
||||
}
|
||||
|
||||
} else {
|
||||
intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
|
||||
}
|
||||
|
||||
return intervals;
|
||||
}
|
||||
}
|
||||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.commandline;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.classloader.JVMUtils;
|
||||
|
|
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import org.broadinstitute.sting.utils.help.ApplicationDetails;
|
||||
import org.broadinstitute.sting.utils.help.HelpFormatter;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.lang.reflect.Field;
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -75,6 +78,7 @@ public class ParsingEngine {
|
|||
* The type of set used must be ordered (but not necessarily sorted).
|
||||
*/
|
||||
private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
|
||||
new IntervalBindingArgumentTypeDescriptor(),
|
||||
new RodBindingArgumentTypeDescriptor(),
|
||||
new CompoundArgumentTypeDescriptor(),
|
||||
new MultiplexArgumentTypeDescriptor()) );
|
||||
|
|
@ -100,6 +104,8 @@ public class ParsingEngine {
|
|||
if(clp != null)
|
||||
argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors());
|
||||
argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS);
|
||||
|
||||
addArgumentSource(ParsingEngineArgumentFiles.class);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -148,21 +154,43 @@ public class ParsingEngine {
|
|||
* command-line arguments to the arguments that are actually
|
||||
* required.
|
||||
* @param tokens Tokens passed on the command line.
|
||||
* @return The parsed arguments by file.
|
||||
*/
|
||||
public void parse( String[] tokens ) {
|
||||
public SortedMap<ArgumentMatchSource, List<String>> parse( String[] tokens ) {
|
||||
argumentMatches = new ArgumentMatches();
|
||||
SortedMap<ArgumentMatchSource, List<String>> parsedArgs = new TreeMap<ArgumentMatchSource, List<String>>();
|
||||
|
||||
int lastArgumentMatchSite = -1;
|
||||
List<String> cmdLineTokens = Arrays.asList(tokens);
|
||||
parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs);
|
||||
|
||||
for( int i = 0; i < tokens.length; i++ ) {
|
||||
String token = tokens[i];
|
||||
ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles();
|
||||
|
||||
// Load the arguments ONLY into the argument files.
|
||||
// Validation may optionally run on the rest of the arguments.
|
||||
loadArgumentsIntoObject(argumentFiles);
|
||||
|
||||
for (File file: argumentFiles.files) {
|
||||
List<String> fileTokens = getArguments(file);
|
||||
parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs);
|
||||
}
|
||||
|
||||
return parsedArgs;
|
||||
}
|
||||
|
||||
private void parse(ArgumentMatchSource matchSource, List<String> tokens,
|
||||
ArgumentMatches argumentMatches, SortedMap<ArgumentMatchSource, List<String>> parsedArgs) {
|
||||
ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1);
|
||||
|
||||
int i = 0;
|
||||
for (String token: tokens) {
|
||||
// If the token is of argument form, parse it into its own argument match.
|
||||
// Otherwise, pair it with the most recently used argument discovered.
|
||||
ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i);
|
||||
if( isArgumentForm(token) ) {
|
||||
ArgumentMatch argumentMatch = parseArgument( token, i );
|
||||
ArgumentMatch argumentMatch = parseArgument( token, site );
|
||||
if( argumentMatch != null ) {
|
||||
argumentMatches.mergeInto( argumentMatch );
|
||||
lastArgumentMatchSite = i;
|
||||
lastArgumentMatchSite = site;
|
||||
}
|
||||
}
|
||||
else {
|
||||
|
|
@ -170,10 +198,31 @@ public class ParsingEngine {
|
|||
!argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite))
|
||||
argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token );
|
||||
else
|
||||
argumentMatches.MissingArgument.addValue( i, token );
|
||||
argumentMatches.MissingArgument.addValue( site, token );
|
||||
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
||||
parsedArgs.put(matchSource, tokens);
|
||||
}
|
||||
|
||||
private List<String> getArguments(File file) {
|
||||
try {
|
||||
if (file.getAbsolutePath().endsWith(".list")) {
|
||||
return getListArguments(file);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
throw new UserException.CouldNotReadInputFile(file, e);
|
||||
}
|
||||
throw new UserException.CouldNotReadInputFile(file, "file extension is not .list");
|
||||
}
|
||||
|
||||
private List<String> getListArguments(File file) throws IOException {
|
||||
ArrayList<String> argsList = new ArrayList<String>();
|
||||
for (String line: FileUtils.readLines(file))
|
||||
argsList.addAll(Arrays.asList(Utils.escapeExpressions(line)));
|
||||
return argsList;
|
||||
}
|
||||
|
||||
public enum ValidationType { MissingRequiredArgument,
|
||||
|
|
@ -494,7 +543,7 @@ public class ParsingEngine {
|
|||
* @param position The position of the token in question.
|
||||
* @return ArgumentMatch associated with this token, or null if no match exists.
|
||||
*/
|
||||
private ArgumentMatch parseArgument( String token, int position ) {
|
||||
private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) {
|
||||
if( !isArgumentForm(token) )
|
||||
throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );
|
||||
|
||||
|
|
@ -579,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException {
|
|||
|
||||
private static String formatArguments( ArgumentMatch invalidValues ) {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for( int index: invalidValues.indices.keySet() )
|
||||
for( String value: invalidValues.indices.get(index) ) {
|
||||
sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) );
|
||||
for( ArgumentMatchSite site: invalidValues.sites.keySet() )
|
||||
for( String value: invalidValues.sites.get(site) ) {
|
||||
switch (site.getSource().getType()) {
|
||||
case CommandLine:
|
||||
sb.append( String.format("%nInvalid argument value '%s' at position %d.",
|
||||
value, site.getIndex()) );
|
||||
break;
|
||||
case File:
|
||||
sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.",
|
||||
value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) );
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException( String.format("Unexpected argument match source type: %s",
|
||||
site.getSource().getType()));
|
||||
}
|
||||
if(value != null && Utils.dupString(' ',value.length()).equals(value))
|
||||
sb.append(" Please make sure any line continuation backslashes on your command line are not followed by whitespace.");
|
||||
}
|
||||
|
|
@ -634,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException {
|
|||
private static String formatArguments(ArgumentDefinition definition, String argumentPassed) {
|
||||
return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Container class to store the list of argument files.
|
||||
* The files will be parsed after the command line arguments.
|
||||
*/
|
||||
class ParsingEngineArgumentFiles {
|
||||
@Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false)
|
||||
public List<File> files = new ArrayList<File>();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ public abstract class ParsingMethod {
|
|||
* @return An argument match. Definition field will be populated if a match was found or
|
||||
* empty if no appropriate definition could be found.
|
||||
*/
|
||||
public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) {
|
||||
public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) {
|
||||
// If the argument is valid, parse out the argument.
|
||||
Matcher matcher = pattern.matcher(token);
|
||||
|
||||
|
|
@ -102,9 +102,7 @@ public abstract class ParsingMethod {
|
|||
|
||||
// Try to find a matching argument. If found, label that as the match. If not found, add the argument
|
||||
// with a null definition.
|
||||
ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags);
|
||||
|
||||
return argumentMatch;
|
||||
return new ArgumentMatch(argument,argumentDefinition,position,tags);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -28,34 +28,30 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
|
|||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.samtools.*;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.*;
|
||||
import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.gatk.executive.MicroScheduler;
|
||||
import org.broadinstitute.sting.gatk.filters.FilterManager;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
|
||||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.io.stubs.Stub;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
|
||||
import org.broadinstitute.sting.gatk.walkers.*;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.*;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalUtils;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.*;
|
||||
|
|
@ -92,7 +88,7 @@ public class GenomeAnalysisEngine {
|
|||
/**
|
||||
* Accessor for sample metadata
|
||||
*/
|
||||
private SampleDataSource sampleDataSource = null;
|
||||
private SampleDB sampleDB = null;
|
||||
|
||||
/**
|
||||
* Accessor for sharded reference-ordered data.
|
||||
|
|
@ -206,6 +202,9 @@ public class GenomeAnalysisEngine {
|
|||
// Prepare the data for traversal.
|
||||
initializeDataSources();
|
||||
|
||||
// initialize sampleDB
|
||||
initializeSampleDB();
|
||||
|
||||
// initialize and validate the interval list
|
||||
initializeIntervals();
|
||||
validateSuppliedIntervals();
|
||||
|
|
@ -222,12 +221,12 @@ public class GenomeAnalysisEngine {
|
|||
ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
|
||||
|
||||
// execute the microscheduler, storing the results
|
||||
Object result = microScheduler.execute(this.walker, shardStrategy);
|
||||
return microScheduler.execute(this.walker, shardStrategy);
|
||||
|
||||
//monitor.stop();
|
||||
//logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));
|
||||
|
||||
return result;
|
||||
//return result;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -259,13 +258,12 @@ public class GenomeAnalysisEngine {
|
|||
* @return A collection of available filters.
|
||||
*/
|
||||
public Collection<ReadFilter> createFilters() {
|
||||
Set<ReadFilter> filters = new HashSet<ReadFilter>();
|
||||
filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
|
||||
final List<ReadFilter> filters = WalkerManager.getReadFilters(walker,this.getFilterManager());
|
||||
if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
|
||||
filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
|
||||
for(String filterName: this.getArguments().readFilters)
|
||||
for(final String filterName: this.getArguments().readFilters)
|
||||
filters.add(this.getFilterManager().createByName(filterName));
|
||||
return Collections.unmodifiableSet(filters);
|
||||
return Collections.unmodifiableList(filters);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -299,10 +297,14 @@ public class GenomeAnalysisEngine {
|
|||
else if(WalkerManager.getDownsamplingMethod(walker) != null)
|
||||
method = WalkerManager.getDownsamplingMethod(walker);
|
||||
else
|
||||
method = argCollection.getDefaultDownsamplingMethod();
|
||||
method = GATKArgumentCollection.getDefaultDownsamplingMethod();
|
||||
return method;
|
||||
}
|
||||
|
||||
protected void setDownsamplingMethod(DownsamplingMethod method) {
|
||||
argCollection.setDownsamplingMethod(method);
|
||||
}
|
||||
|
||||
public BAQ.QualityMode getWalkerBAQQualityMode() { return WalkerManager.getBAQQualityMode(walker); }
|
||||
public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }
|
||||
|
||||
|
|
@ -381,18 +383,18 @@ public class GenomeAnalysisEngine {
|
|||
// If intervals is non-null and empty at this point, it means that the list of intervals to process
|
||||
// was filtered down to an empty set (eg., the user specified something like -L chr1 -XL chr1). Since
|
||||
// this was very likely unintentional, the user should be informed of this. Note that this is different
|
||||
// from the case where intervals == null, which indicates either that there were no interval arguments,
|
||||
// or that -L all was specified.
|
||||
// from the case where intervals == null, which indicates that there were no interval arguments.
|
||||
if ( intervals != null && intervals.isEmpty() ) {
|
||||
throw new ArgumentException("The given combination of -L and -XL options results in an empty set. " +
|
||||
"No intervals to process.");
|
||||
logger.warn("The given combination of -L and -XL options results in an empty set. No intervals to process.");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sharding strategy given a driving data source.
|
||||
*
|
||||
* @param readsDataSource readsDataSource
|
||||
* @param drivingDataSource Data on which to shard.
|
||||
* @param intervals intervals
|
||||
* @return the sharding strategy
|
||||
*/
|
||||
protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
|
||||
|
|
@ -429,7 +431,7 @@ public class GenomeAnalysisEngine {
|
|||
return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
|
||||
}
|
||||
|
||||
ShardStrategy shardStrategy = null;
|
||||
ShardStrategy shardStrategy;
|
||||
ShardStrategyFactory.SHATTER_STRATEGY shardType;
|
||||
|
||||
long SHARD_SIZE = 100000L;
|
||||
|
|
@ -438,6 +440,8 @@ public class GenomeAnalysisEngine {
|
|||
if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
|
||||
|
||||
if (intervals != null && !intervals.isEmpty()) {
|
||||
if (readsDataSource == null)
|
||||
throw new IllegalArgumentException("readsDataSource is null");
|
||||
if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
|
||||
throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data. Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
|
||||
|
||||
|
|
@ -501,7 +505,8 @@ public class GenomeAnalysisEngine {
|
|||
*/
|
||||
private void initializeTempDirectory() {
|
||||
File tempDir = new File(System.getProperty("java.io.tmpdir"));
|
||||
tempDir.mkdirs();
|
||||
if (!tempDir.exists() && !tempDir.mkdirs())
|
||||
throw new UserException.BadTmpDir("Unable to create directory");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -566,34 +571,23 @@ public class GenomeAnalysisEngine {
|
|||
protected void initializeIntervals() {
|
||||
|
||||
// return if no interval arguments at all
|
||||
if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
|
||||
if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
|
||||
return;
|
||||
|
||||
// if '-L all' was specified, verify that it was the only -L specified and return if so.
|
||||
if(argCollection.intervals != null) {
|
||||
for(String interval: argCollection.intervals) {
|
||||
if(interval.trim().equals("all")) {
|
||||
if(argCollection.intervals.size() > 1)
|
||||
throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
|
||||
|
||||
// '-L all' was specified and seems valid. Return.
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Note that the use of '-L all' is no longer supported.
|
||||
|
||||
// if include argument isn't given, create new set of all possible intervals
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
|
||||
GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
|
||||
GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
|
||||
loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
|
||||
loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
|
||||
|
||||
// if no exclude arguments, can return parseIntervalArguments directly
|
||||
if (argCollection.excludeIntervals == null)
|
||||
if ( argCollection.excludeIntervals == null )
|
||||
intervals = includeSortedSet;
|
||||
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
// otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
|
||||
else {
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
|
||||
GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
|
||||
intervals = includeSortedSet.subtractRegions(excludeSortedSet);
|
||||
|
||||
// logging messages only printed when exclude (-XL) arguments are given
|
||||
|
|
@ -608,47 +602,26 @@ public class GenomeAnalysisEngine {
|
|||
|
||||
/**
|
||||
* Loads the intervals relevant to the current execution
|
||||
* @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
|
||||
* notation, or a combination of the above
|
||||
* @param rodIntervals a list of ROD intervals to add to the returned set. Can be empty or null.
|
||||
* @param argList argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
|
||||
* @param rule interval merging rule
|
||||
* @return A sorted, merged list of all intervals specified in this arg list.
|
||||
*/
|
||||
protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
|
||||
protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
|
||||
|
||||
boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
|
||||
argCollection.unsafe == ValidationExclusion.TYPE.ALL);
|
||||
List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
|
||||
for ( IntervalBinding intervalBinding : argList ) {
|
||||
List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
|
||||
|
||||
List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
|
||||
List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
|
||||
if ( intervals.isEmpty() ) {
|
||||
logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
|
||||
}
|
||||
|
||||
allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
|
||||
}
|
||||
|
||||
return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
|
||||
}
|
||||
|
||||
/**
|
||||
* if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
|
||||
* @return ROD intervals as GenomeLocs
|
||||
*/
|
||||
private List<GenomeLoc> getRODIntervals() {
|
||||
Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
|
||||
// Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
|
||||
List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
|
||||
if (rodNames != null && argCollection.RODToInterval != null) {
|
||||
String rodName = argCollection.RODToInterval;
|
||||
|
||||
// check to make sure we have a rod of that name
|
||||
if (!rodNames.containsKey(rodName))
|
||||
throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
|
||||
|
||||
for (String str : rodNames.keySet())
|
||||
if (str.equals(rodName)) {
|
||||
logger.info("Adding interval list from track (ROD) named " + rodName);
|
||||
RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
|
||||
ret.addAll(intervalGenerator.toGenomeLocList());
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add additional, externally managed IO streams for inputs.
|
||||
*
|
||||
|
|
@ -692,12 +665,22 @@ public class GenomeAnalysisEngine {
|
|||
for (ReadFilter filter : filters)
|
||||
filter.initialize(this);
|
||||
|
||||
sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
|
||||
|
||||
// set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
|
||||
rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
|
||||
}
|
||||
|
||||
/**
|
||||
* Entry-point function to initialize the samples database from input data and pedigree arguments
|
||||
*/
|
||||
private void initializeSampleDB() {
|
||||
SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
|
||||
sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
|
||||
sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
|
||||
sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
|
||||
sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
|
||||
sampleDB = sampleDBBuilder.getFinalSampleDB();
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets a unique identifier for the reader sourcing this read.
|
||||
* @param read Read to examine.
|
||||
|
|
@ -716,106 +699,13 @@ public class GenomeAnalysisEngine {
|
|||
return getReadsDataSource().getSAMFile(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
|
||||
* found in the corresponding bam file.
|
||||
*
|
||||
* @return Sets of samples in the merged input SAM stream, grouped by readers
|
||||
*/
|
||||
public List<Set<String>> getSamplesByReaders() {
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> samples = new HashSet<String>(1);
|
||||
sample_sets.add(samples);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
samples.add(g.getSample());
|
||||
}
|
||||
}
|
||||
|
||||
return sample_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
|
||||
* found in the corresponding bam file.
|
||||
*
|
||||
* @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
|
||||
*/
|
||||
public List<Set<String>> getLibrariesByReaders() {
|
||||
|
||||
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> libs = new HashSet<String>(2);
|
||||
lib_sets.add(libs);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
libs.add(g.getLibrary());
|
||||
}
|
||||
}
|
||||
|
||||
return lib_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
|
||||
*
|
||||
* Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
|
||||
* individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
|
||||
* returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
|
||||
* (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
|
||||
*
|
||||
* @return sets of (merged) read group ids in order of input bams
|
||||
*/
|
||||
public List<Set<String>> getMergedReadGroupsByReaders() {
|
||||
|
||||
|
||||
Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
|
||||
|
||||
List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
|
||||
|
||||
for (SAMReaderID r : readers) {
|
||||
|
||||
Set<String> groups = new HashSet<String>(5);
|
||||
rg_sets.add(groups);
|
||||
|
||||
for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
|
||||
if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
|
||||
// use HeaderMerger to translate original read group id from the reader into the read group id in the
|
||||
// merged stream, and save that remapped read group id to associate it with specific reader
|
||||
groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
|
||||
} else {
|
||||
// otherwise, pass through the unmapped read groups since this is what Picard does as well
|
||||
groups.add(g.getReadGroupId());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return rg_sets;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
|
||||
*
|
||||
* @param reads Reads data source.
|
||||
* @param reference Reference data source.
|
||||
* @param rods a collection of the reference ordered data tracks
|
||||
* @param manager manager
|
||||
*/
|
||||
private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
|
||||
if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
|
||||
|
|
@ -844,15 +734,22 @@ public class GenomeAnalysisEngine {
|
|||
/**
|
||||
* Gets a data source for the given set of reads.
|
||||
*
|
||||
* @param argCollection arguments
|
||||
* @param genomeLocParser parser
|
||||
* @param refReader reader
|
||||
* @return A data source for the given set of reads.
|
||||
*/
|
||||
private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
|
||||
DownsamplingMethod method = getDownsamplingMethod();
|
||||
|
||||
// Synchronize the method back into the collection so that it shows up when
|
||||
// interrogating for the downsample method during command line recreation.
|
||||
setDownsamplingMethod(method);
|
||||
|
||||
if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
|
||||
throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
|
||||
|
||||
SAMDataSource dataSource = new SAMDataSource(
|
||||
return new SAMDataSource(
|
||||
samReaderIDs,
|
||||
genomeLocParser,
|
||||
argCollection.useOriginalBaseQualities,
|
||||
|
|
@ -868,14 +765,12 @@ public class GenomeAnalysisEngine {
|
|||
refReader,
|
||||
argCollection.defaultBaseQualities,
|
||||
!argCollection.disableLowMemorySharding);
|
||||
return dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* Opens a reference sequence file paired with an index. Only public for testing purposes
|
||||
*
|
||||
* @param refFile Handle to a reference sequence file. Non-null.
|
||||
* @return A thread-safe file wrapper.
|
||||
*/
|
||||
public void setReferenceDataSource(File refFile) {
|
||||
this.referenceDataSource = new ReferenceDataSource(refFile);
|
||||
|
|
@ -929,6 +824,26 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource.getHeader(reader);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an ordered list of the unmerged SAM file headers known to this engine.
|
||||
* @return list of header for each input SAM file, in command line order
|
||||
*/
|
||||
public List<SAMFileHeader> getSAMFileHeaders() {
|
||||
final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
|
||||
for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
|
||||
headers.add(getReadsDataSource().getHeader(id));
|
||||
}
|
||||
return headers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the master sequence dictionary for this GATK engine instance
|
||||
* @return a never-null dictionary listing all of the contigs known to this engine instance
|
||||
*/
|
||||
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||
return getReferenceDataSource().getReference().getSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns data source object encapsulating all essential info and handlers used to traverse
|
||||
* reads; header merger, individual file readers etc can be accessed through the returned data source object.
|
||||
|
|
@ -939,8 +854,6 @@ public class GenomeAnalysisEngine {
|
|||
return this.readsDataSource;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Sets the collection of GATK main application arguments.
|
||||
*
|
||||
|
|
@ -1027,140 +940,14 @@ public class GenomeAnalysisEngine {
|
|||
return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
|
||||
}
|
||||
|
||||
public SampleDataSource getSampleMetadata() {
|
||||
return this.sampleDataSource;
|
||||
}
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// code for working with Samples database
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id sample id
|
||||
* @return sample Object with this ID
|
||||
*/
|
||||
public Sample getSampleById(String id) {
|
||||
return sampleDataSource.getSampleById(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample for a given read group
|
||||
* Must first look up ID for read group
|
||||
* @param readGroup of sample
|
||||
* @return sample object with ID from the read group
|
||||
*/
|
||||
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
|
||||
return sampleDataSource.getSampleByReadGroup(readGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample for a given read
|
||||
* Must first look up read group, and then sample ID for that read group
|
||||
* @param read of sample
|
||||
* @return sample object of this read
|
||||
*/
|
||||
public Sample getSampleByRead(SAMRecord read) {
|
||||
return getSampleByReadGroup(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return sampleDataSource.sampleCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
|
||||
* @param familyId family ID
|
||||
* @return Samples with the given family ID
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
return sampleDataSource.getFamily(familyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample parent sample
|
||||
* @return children of the given sample
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
return sampleDataSource.getChildren(sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets all the samples
|
||||
* @return
|
||||
*/
|
||||
public Collection<Sample> getSamples() {
|
||||
return sampleDataSource.getSamples();
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a list of sample names and returns their corresponding sample objects
|
||||
*
|
||||
* @param sampleNameList List of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
return sampleDataSource.getSamples(sampleNameList);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have any value (which could be null) for a given property
|
||||
* @param key Property key
|
||||
* @return Set of samples with the property
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key) {
|
||||
return sampleDataSource.getSamplesWithProperty(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have a property with a certain value
|
||||
* Value must be a string for now - could add a similar method for matching any objects in the future
|
||||
*
|
||||
* @param key Property key
|
||||
* @param value String property value
|
||||
* @return Set of samples that match key and value
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key, String value) {
|
||||
return sampleDataSource.getSamplesWithProperty(key, value);
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of sample objects for the sample names in a variant context
|
||||
*
|
||||
* @param context Any variant context
|
||||
* @return a set of the sample objects
|
||||
*/
|
||||
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
|
||||
Set<Sample> samples = new HashSet<Sample>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
samples.add(sampleDataSource.getOrCreateSample(sampleName));
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
return sampleDataSource.getSAMFileSamples();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a subcontext restricted to samples with a given property key/value
|
||||
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
|
||||
* @param context VariantContext to filter
|
||||
* @param key property key
|
||||
* @param value property value (must be string)
|
||||
* @return subcontext
|
||||
*/
|
||||
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
|
||||
return sampleDataSource.subContextFromSampleProperty(context, key, value);
|
||||
public SampleDB getSampleDB() {
|
||||
return this.sampleDB;
|
||||
}
|
||||
|
||||
public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
|||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Holds a bunch of basic information about the traversal.
|
||||
|
|
@ -102,8 +103,12 @@ public class ReadMetrics implements Cloneable {
|
|||
counter.put(filter.getClass(), c + 1L);
|
||||
}
|
||||
|
||||
public Map<Class,Long> getCountsByFilter() {
|
||||
return Collections.unmodifiableMap(counter);
|
||||
public Map<String,Long> getCountsByFilter() {
|
||||
final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
|
||||
for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
|
||||
sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
|
||||
}
|
||||
return sortedCounts;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -29,13 +29,11 @@ package org.broadinstitute.sting.gatk.arguments;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.*;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class DbsnpArgumentCollection {
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,34 +26,26 @@
|
|||
package org.broadinstitute.sting.gatk.arguments;
|
||||
|
||||
import net.sf.samtools.SAMFileReader;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.Hidden;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.IntervalBinding;
|
||||
import org.broadinstitute.sting.gatk.DownsampleType;
|
||||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
|
||||
import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
|
||||
import org.broadinstitute.sting.utils.interval.IntervalSetRule;
|
||||
import org.simpleframework.xml.*;
|
||||
import org.simpleframework.xml.core.Persister;
|
||||
import org.simpleframework.xml.stream.Format;
|
||||
import org.simpleframework.xml.stream.HyphenStyle;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.InputStream;
|
||||
import java.io.PrintStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* @author aaron
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class GATKArgumentCollection {
|
||||
|
||||
/* our version number */
|
||||
|
|
@ -64,58 +56,58 @@ public class GATKArgumentCollection {
|
|||
public GATKArgumentCollection() {
|
||||
}
|
||||
|
||||
@ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required = false)
|
||||
public Map<String, String> walkerArgs = new HashMap<String, String>();
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
|
||||
public List<String> samFiles = new ArrayList<String>();
|
||||
|
||||
// parameters and their defaults
|
||||
@ElementList(required = false)
|
||||
@Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
|
||||
public List<File> sampleFiles = new ArrayList<File>();
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
|
||||
public Integer readBufferSize = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate? Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
|
||||
public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
|
||||
@Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
|
||||
public List<String> readFilters = new ArrayList<String>();
|
||||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> intervals = null;
|
||||
/**
|
||||
* Using this option one can instruct the GATK engine to traverse over only part of the genome. This argument can be specified multiple times.
|
||||
* One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
|
||||
* Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
|
||||
*/
|
||||
@Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
||||
public List<IntervalBinding<Feature>> intervals = null;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
|
||||
public List<String> excludeIntervals = null;
|
||||
/**
|
||||
* Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome. This argument can be specified multiple times.
|
||||
* One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
|
||||
* Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
|
||||
*/
|
||||
@Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
|
||||
public List<IntervalBinding<Feature>> excludeIntervals = null;
|
||||
|
||||
/**
|
||||
* How should the intervals specified by multiple -L or -XL arguments be combined? Using this argument one can, for example, traverse over all of the positions
|
||||
* for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
|
||||
*/
|
||||
@Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
|
||||
public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
|
||||
|
||||
/**
|
||||
* Should abutting (but not overlapping) intervals be treated as separate intervals?
|
||||
*/
|
||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
|
||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||
|
||||
@Element(required = false)
|
||||
@Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
|
||||
public File referenceFile = null;
|
||||
|
||||
@Deprecated
|
||||
@Hidden
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
|
||||
public ArrayList<String> RODBindings = new ArrayList<String>();
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
|
||||
public String RODToInterval = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
|
||||
public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
|
||||
public boolean nonDeterministicRandomSeed = false;
|
||||
|
||||
|
|
@ -128,22 +120,19 @@ public class GATKArgumentCollection {
|
|||
private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
|
||||
private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
|
||||
@Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus. Reads will be selected randomly to be removed from the pile based on the method described here", required = false)
|
||||
public DownsampleType downsamplingType = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
|
||||
public Double downsampleFraction = null;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
|
||||
public Integer downsampleCoverage = null;
|
||||
|
||||
/**
|
||||
* Gets the downsampling method explicitly specified by the user. If the user didn't specify
|
||||
* a default downsampling mechanism, return null.
|
||||
* @return The explicitly specified downsampling mechanism, or null if none exists.
|
||||
* a default downsampling mechanism, return the default.
|
||||
* @return The explicitly specified downsampling mechanism, or the default if none exists.
|
||||
*/
|
||||
public DownsamplingMethod getDownsamplingMethod() {
|
||||
if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
|
||||
|
|
@ -153,16 +142,26 @@ public class GATKArgumentCollection {
|
|||
return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments.
|
||||
* @param method The downsampling mechanism.
|
||||
*/
|
||||
public void setDownsamplingMethod(DownsamplingMethod method) {
|
||||
if (method == null)
|
||||
throw new IllegalArgumentException("method is null");
|
||||
downsamplingType = method.type;
|
||||
downsampleCoverage = method.toCoverage;
|
||||
downsampleFraction = method.toFraction;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// BAQ arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
|
||||
public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled). Default value is 40. 30 is perhaps better for whole genome call sets", required = false)
|
||||
public double BAQGOP = BAQ.DEFAULT_GOP;
|
||||
|
||||
|
|
@ -171,7 +170,6 @@ public class GATKArgumentCollection {
|
|||
// performance log arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
|
||||
public File performanceLog = null;
|
||||
|
||||
|
|
@ -184,67 +182,117 @@ public class GATKArgumentCollection {
|
|||
return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
|
||||
}
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
|
||||
public Boolean useOriginalBaseQualities = false;
|
||||
|
||||
@Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
|
||||
public byte defaultBaseQualities = -1;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
|
||||
public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime. For expert users only who know what they are doing. We do not support usage of this argument.", required = false)
|
||||
public ValidationExclusion.TYPE unsafe;
|
||||
|
||||
/** How many threads should be allocated to this analysis. */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
|
||||
@Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis", required = false)
|
||||
public int numberOfThreads = 1;
|
||||
|
||||
/** What rule should we use when merging intervals */
|
||||
@Element(required = false)
|
||||
@Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
|
||||
public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
|
||||
|
||||
@ElementList(required = false)
|
||||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
|
||||
@Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line", required = false)
|
||||
public List<String> readGroupBlackList = null;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// distributed GATK arguments
|
||||
// PED (pedigree) support
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
|
||||
@Hidden
|
||||
public File processingTrackerFile = null;
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
|
||||
@Hidden
|
||||
public boolean restartProcessingTracker = false;
|
||||
/**
|
||||
* <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
|
||||
* processed in the GATK.</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
|
||||
* <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>Family ID</li>
|
||||
* <li>Individual ID</li>
|
||||
* <li>Paternal ID</li>
|
||||
* <li>Maternal ID</li>
|
||||
* <li>Sex (1=male; 2=female; other=unknown)</li>
|
||||
* <li>Phenotype</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
|
||||
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
|
||||
* quantitative trait or an affection status column: GATK will automatically detect which type
|
||||
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
|
||||
*
|
||||
* <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
|
||||
*
|
||||
* <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
|
||||
* line will be ignored. Do not start any family IDs with this character therefore.</p>
|
||||
*
|
||||
* <p>Affection status should be coded:</p>
|
||||
*
|
||||
* <ul>
|
||||
* <li>-9 missing</li>
|
||||
* <li>0 missing</li>
|
||||
* <li>1 unaffected</li>
|
||||
* <li>2 affected</li>
|
||||
* </ul>
|
||||
*
|
||||
* <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
|
||||
* to phenotype values are interpreted as string phenotype values. In this case -9 uniquely
|
||||
* represents the missing value.</p>
|
||||
*
|
||||
* <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
|
||||
*
|
||||
* <p>For example, here are two individuals (one row = one person):</p>
|
||||
*
|
||||
* <pre>
|
||||
* FAM001 1 0 0 1 2
|
||||
* FAM001 2 0 0 1 2
|
||||
* </pre>
|
||||
*
|
||||
* <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
|
||||
* tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
|
||||
*
|
||||
* <p>Note that most GATK walkers do not use pedigree information. Walkers that require pedigree
|
||||
* data should clearly indicate so in their arguments and will throw errors if required pedigree
|
||||
* information is missing.</p>
|
||||
*/
|
||||
@Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
|
||||
public List<File> pedigreeFiles = Collections.emptyList();
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file. For debugging, only",required=false)
|
||||
@Hidden
|
||||
public File processingTrackerStatusFile = null;
|
||||
/**
|
||||
* Inline PED records (see -ped argument). Each -pedString STRING can contain one or more
|
||||
* valid PED records (see -ped) separated by semi-colons. Supports all tags for each pedString
|
||||
* as -ped supports
|
||||
*/
|
||||
@Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
|
||||
public List<String> pedigreeStrings = Collections.emptyList();
|
||||
|
||||
@Element(required=false)
|
||||
@Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
|
||||
@Hidden
|
||||
public int processTrackerID = -1;
|
||||
/**
|
||||
* How strict should we be in parsing the PED files?
|
||||
*/
|
||||
@Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
|
||||
public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// BAM indexing and sharding arguments
|
||||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM. NO INTEGRATION TESTS are available. Use at your own risk.",required=false)
|
||||
@Hidden
|
||||
public boolean allowIntervalsWithUnindexedBAM = false;
|
||||
|
||||
@Element(required = false)
|
||||
@Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality.",required=false)
|
||||
@Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality",required=false)
|
||||
public boolean disableLowMemorySharding = false;
|
||||
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
|
@ -253,69 +301,6 @@ public class GATKArgumentCollection {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* marshal the data out to a object
|
||||
*
|
||||
* @param collection the GATKArgumentCollection to load into
|
||||
* @param outputFile the file to write to
|
||||
*/
|
||||
public static void marshal(GATKArgumentCollection collection, String outputFile) {
|
||||
Serializer serializer = new Persister(new Format(new HyphenStyle()));
|
||||
File result = new File(outputFile);
|
||||
try {
|
||||
serializer.write(collection, result);
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* marshal the data out to a object
|
||||
*
|
||||
* @param collection the GATKArgumentCollection to load into
|
||||
* @param outputFile the stream to write to
|
||||
*/
|
||||
public static void marshal(GATKArgumentCollection collection, PrintStream outputFile) {
|
||||
Serializer serializer = new Persister(new Format(new HyphenStyle()));
|
||||
try {
|
||||
serializer.write(collection, outputFile);
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unmashall the object from a configuration file
|
||||
*
|
||||
* @param filename the filename to marshal from
|
||||
*/
|
||||
public static GATKArgumentCollection unmarshal(String filename) {
|
||||
Serializer serializer = new Persister(new Format(new HyphenStyle()));
|
||||
File source = new File(filename);
|
||||
try {
|
||||
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
|
||||
return example;
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Failed to marshal the data from file " + filename, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* unmashall the object from a configuration file
|
||||
*
|
||||
* @param file the inputstream to marshal from
|
||||
*/
|
||||
public static GATKArgumentCollection unmarshal(InputStream file) {
|
||||
Serializer serializer = new Persister(new Format(new HyphenStyle()));
|
||||
try {
|
||||
GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, file);
|
||||
return example;
|
||||
} catch (Exception e) {
|
||||
throw new ReviewedStingException("Failed to marshal the data from file " + file.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* test equality between two arg collections. This function defines the statement:
|
||||
* "not fun to write"
|
||||
|
|
@ -363,7 +348,7 @@ public class GATKArgumentCollection {
|
|||
if (!other.referenceFile.equals(this.referenceFile)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.intervals.equals(this.intervals)) {
|
||||
if ((other.intervals == null && this.intervals != null) || !other.intervals.equals(this.intervals)) {
|
||||
return false;
|
||||
}
|
||||
if (!other.excludeIntervals.equals(this.excludeIntervals)) {
|
||||
|
|
@ -386,39 +371,21 @@ public class GATKArgumentCollection {
|
|||
if (other.intervalMerging != this.intervalMerging) {
|
||||
return false;
|
||||
}
|
||||
if ((other.RODToInterval == null && RODToInterval != null) ||
|
||||
(other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (other.phoneHomeType != this.phoneHomeType) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (BTIMergeRule != other.BTIMergeRule)
|
||||
if (intervalSetRule != other.intervalSetRule)
|
||||
return false;
|
||||
|
||||
if ( BAQMode != other.BAQMode) return false;
|
||||
if ( BAQMode != other.BAQMode ) return false;
|
||||
if ( BAQGOP != other.BAQGOP ) return false;
|
||||
|
||||
if ((other.performanceLog == null && this.performanceLog != null) ||
|
||||
(other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
|
||||
return false;
|
||||
|
||||
if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
|
||||
(other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
|
||||
return false;
|
||||
|
||||
if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
|
||||
(other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
|
||||
return false;
|
||||
|
||||
if ( restartProcessingTracker != other.restartProcessingTracker )
|
||||
return false;
|
||||
|
||||
if ( processTrackerID != other.processTrackerID )
|
||||
return false;
|
||||
|
||||
if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
|
||||
return false;
|
||||
|
||||
|
|
|
|||
|
|
@ -28,13 +28,11 @@ package org.broadinstitute.sting.gatk.arguments;
|
|||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.simpleframework.xml.Root;
|
||||
|
||||
/**
|
||||
* @author ebanks
|
||||
* @version 1.0
|
||||
*/
|
||||
@Root
|
||||
public class StandardVariantContextInputArgumentCollection {
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@ public class ValidationExclusion {
|
|||
|
||||
public enum TYPE {
|
||||
ALLOW_UNINDEXED_BAM, // allow bam files that do not have an index; we'll traverse them using monolithic shard
|
||||
ALLOW_EMPTY_INTERVAL_LIST, // allow the user to pass in an empty interval list
|
||||
ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
|
||||
NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
|
||||
ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.HasGenomeLocation;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
*/
|
||||
@Deprecated
|
||||
//todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
|
||||
public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
|
||||
public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }
|
||||
|
||||
/**
|
||||
* Are there any reads associated with this locus?
|
||||
|
|
@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
* @return
|
||||
*/
|
||||
public boolean hasReads() {
|
||||
return basePileup != null && basePileup.size() > 0 ;
|
||||
return basePileup != null && basePileup.getNumberOfElements() > 0 ;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
|
|||
* @return
|
||||
*/
|
||||
public int size() {
|
||||
return basePileup.size();
|
||||
return basePileup.getNumberOfElements();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -26,7 +26,6 @@
|
|||
package org.broadinstitute.sting.gatk.contexts;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
|
|||
return splitContextBySampleName(context, null);
|
||||
}
|
||||
|
||||
public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
|
||||
Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
|
||||
for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
|
||||
m.put(new Sample(entry.getKey()), entry.getValue());
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
|
||||
* of sample object.
|
||||
|
|
@ -97,11 +88,11 @@ public class AlignmentContextUtils {
|
|||
GenomeLoc loc = context.getLocation();
|
||||
HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();
|
||||
|
||||
for(String sample: context.getPileup().getSampleNames()) {
|
||||
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
|
||||
for(String sample: context.getPileup().getSamples()) {
|
||||
ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
|
||||
|
||||
// Don't add empty pileups to the split context.
|
||||
if(pileupBySample.size() == 0)
|
||||
if(pileupBySample.getNumberOfElements() == 0)
|
||||
continue;
|
||||
|
||||
if(sample != null)
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.providers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
|
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
|
|||
* @param site Site at which to create the blank locus context.
|
||||
* @return empty context.
|
||||
*/
|
||||
private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
|
||||
private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
|
||||
private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
|
||||
private AlignmentContext createEmptyLocus( GenomeLoc site ) {
|
||||
return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
*/
|
||||
public FilePointer next() {
|
||||
FilePointer current = wrappedIterator.next();
|
||||
while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
|
||||
while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
|
||||
current = current.combine(parser,wrappedIterator.next());
|
||||
return current;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
|
|||
import org.broadinstitute.sting.utils.baq.BAQSamIterator;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
|
|
@ -57,6 +58,8 @@ import java.util.*;
|
|||
* Converts shards to SAM iterators over the specified region
|
||||
*/
|
||||
public class SAMDataSource {
|
||||
final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
|
||||
|
||||
/** Backing support for reads. */
|
||||
protected final ReadProperties readProperties;
|
||||
|
||||
|
|
@ -235,6 +238,12 @@ public class SAMDataSource {
|
|||
for(SAMFileReader reader: readers.values()) {
|
||||
// Get the sort order, forcing it to coordinate if unsorted.
|
||||
SAMFileHeader header = reader.getFileHeader();
|
||||
|
||||
if ( header.getReadGroups().isEmpty() ) {
|
||||
throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile,
|
||||
"SAM file doesn't have any read groups defined in the header. The GATK no longer supports SAM files without read groups");
|
||||
}
|
||||
|
||||
SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate;
|
||||
|
||||
// Validate that all input files are sorted in the same order.
|
||||
|
|
@ -638,7 +647,9 @@ public class SAMDataSource {
|
|||
BAQ.QualityMode qmode,
|
||||
IndexedFastaSequenceFile refReader,
|
||||
byte defaultBaseQualities) {
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
|
||||
// only wrap if we are replacing the original qualitiies or using a default base quality
|
||||
wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
|
||||
|
||||
// NOTE: this (and other filtering) should be done before on-the-fly sorting
|
||||
// as there is no reason to sort something that we will end of throwing away
|
||||
|
|
@ -750,6 +761,7 @@ public class SAMDataSource {
|
|||
public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
|
||||
for(SAMReaderID readerID: readerIDs) {
|
||||
SAMFileReader reader = new SAMFileReader(readerID.samFile);
|
||||
reader.setSAMRecordFactory(factory);
|
||||
reader.enableFileSource(true);
|
||||
reader.enableIndexMemoryMapping(false);
|
||||
if(!enableLowMemorySharding)
|
||||
|
|
|
|||
|
|
@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
|
|||
// intervals
|
||||
GenomeLocSortedSet intervalSortedSet = null;
|
||||
if(intervals != null)
|
||||
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
|
||||
intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
|
||||
else {
|
||||
intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
|
||||
for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
|
||||
|
|
|
|||
|
|
@ -1,30 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 12, 2010
|
||||
* Time: 2:09:16 PM
|
||||
*/
|
||||
public class PropertyDefinition {
|
||||
|
||||
String property;
|
||||
|
||||
String[] values;
|
||||
|
||||
public String getProperty() {
|
||||
return property;
|
||||
}
|
||||
|
||||
public void setProperty(String property) {
|
||||
this.property = property;
|
||||
}
|
||||
|
||||
public String[] getValues() {
|
||||
return values;
|
||||
}
|
||||
|
||||
public void setValues(String[] values) {
|
||||
this.values = values;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,203 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Jul 26, 2010
|
||||
* Time: 3:31:38 PM
|
||||
*/
|
||||
public class Sample implements java.io.Serializable {
|
||||
|
||||
private final String id;
|
||||
|
||||
private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
|
||||
|
||||
private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
|
||||
|
||||
private HashMap<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
|
||||
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
||||
public Sample(String id) {
|
||||
/* if (id == null) {
|
||||
throw new StingException("Error creating sample: sample ID cannot be null");
|
||||
}*/
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public String getId() {
|
||||
return this.id;
|
||||
}
|
||||
|
||||
public Map<String, Object> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public void setProperties(Map<String, Object> properties) {
|
||||
this.properties = (HashMap) properties;
|
||||
}
|
||||
|
||||
public Map<String,Sample> getRelationships() {
|
||||
return Collections.unmodifiableMap(this.relationships);
|
||||
}
|
||||
|
||||
public void setSampleFileEntry(boolean value) {
|
||||
this.hasSampleFileEntry = value;
|
||||
}
|
||||
|
||||
public boolean hasSAMFileEntry() {
|
||||
return this.hasSAMFileEntry;
|
||||
}
|
||||
|
||||
public void setSAMFileEntry(boolean value) {
|
||||
this.hasSAMFileEntry = value;
|
||||
}
|
||||
|
||||
public boolean hasSampleFileEntry() {
|
||||
return this.hasSampleFileEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one property
|
||||
* @param key key of property
|
||||
* @return value of property as generic object
|
||||
*/
|
||||
public Object getProperty(String key) {
|
||||
return properties.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set a property
|
||||
* If property already exists, it is overwritten
|
||||
* @param key key of property
|
||||
* @param value object to be stored in properties array
|
||||
*/
|
||||
public void setProperty(String key, Object value) {
|
||||
|
||||
if (relationships.containsKey(key)) {
|
||||
throw new StingException("The same key cannot exist as a property and a relationship");
|
||||
}
|
||||
|
||||
if (key.equals("gender") && value.getClass() != Gender.class) {
|
||||
throw new StingException("'gender' property must be of type Sample.Gender");
|
||||
}
|
||||
|
||||
if (key.equals("population") && value.getClass() != String.class) {
|
||||
throw new StingException("'population' property must be of type String");
|
||||
}
|
||||
|
||||
properties.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get one relationship
|
||||
* @param key of relationship
|
||||
* @return Sample object that this relationship points to
|
||||
*/
|
||||
public Sample getRelationship(String key) {
|
||||
return relationships.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set one relationship
|
||||
* If already set, it is overwritten
|
||||
* @param key key of the relationship
|
||||
* @param value Sample object this relationship points to
|
||||
*/
|
||||
public void setRelationship(String key, Sample value) {
|
||||
if (properties.containsKey(key)) {
|
||||
throw new StingException("The same key cannot exist as a property and a relationship");
|
||||
}
|
||||
relationships.put(key, value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's mother
|
||||
* @return sample object with relationship mother, if exists, or null
|
||||
*/
|
||||
public Sample getMother() {
|
||||
return getRelationship("mother");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's father
|
||||
* @return sample object with relationship father, if exists, or null
|
||||
*/
|
||||
public Sample getFather() {
|
||||
return getRelationship("father");
|
||||
}
|
||||
|
||||
/**
|
||||
* Get gender of the sample
|
||||
* @return property of key "gender" - must be of type Gender
|
||||
*/
|
||||
public Gender getGender() {
|
||||
return (Gender) properties.get("gender");
|
||||
}
|
||||
|
||||
public String getPopulation() {
|
||||
return (String) properties.get("population");
|
||||
}
|
||||
|
||||
public String getFamilyId() {
|
||||
return (String) properties.get("familyId");
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is male, false if female, unknown, or null
|
||||
*/
|
||||
public boolean isMale() {
|
||||
return properties.get("gender") == Gender.MALE;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return True if sample is female, false if male, unknown or null
|
||||
*/
|
||||
public boolean isFemale() {
|
||||
return properties.get("gender") == Gender.MALE;
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param key property key
|
||||
* @return true if sample has this property (even if its value is null)
|
||||
*/
|
||||
public boolean hasProperty(String key) {
|
||||
return properties.containsKey(key);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
|
||||
Sample sample = (Sample) o;
|
||||
|
||||
if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
|
||||
if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
|
||||
if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
|
||||
if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
|
||||
if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return id != null ? id.hashCode() : "".hashCode();
|
||||
}
|
||||
}
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 13, 2010
|
||||
* Time: 5:13:46 PM
|
||||
*/
|
||||
public class SampleAlias {
|
||||
|
||||
String mainId;
|
||||
|
||||
String[] otherIds;
|
||||
|
||||
public String getMainId() {
|
||||
return mainId;
|
||||
}
|
||||
|
||||
public void setMainId(String mainId) {
|
||||
this.mainId = mainId;
|
||||
}
|
||||
|
||||
public String[] getOtherIds() {
|
||||
return otherIds;
|
||||
}
|
||||
|
||||
public void setOtherIds(String[] otherIds) {
|
||||
this.otherIds = otherIds;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,590 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.yaml.snakeyaml.TypeDescription;
|
||||
import org.yaml.snakeyaml.Yaml;
|
||||
import org.yaml.snakeyaml.constructor.Constructor;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Jul 26, 2010
|
||||
* Time: 3:30:09 PM
|
||||
*
|
||||
* This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
|
||||
* in the GATK by the "--samples" argument. This class reads and parses those files.
|
||||
*
|
||||
* Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
|
||||
* only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
|
||||
* wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
|
||||
*
|
||||
*/
|
||||
public class SampleDataSource {
|
||||
|
||||
/**
|
||||
* SAMFileHeader that has been created for this analysis.
|
||||
*/
|
||||
private SAMFileHeader header;
|
||||
|
||||
/**
|
||||
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
|
||||
* this is stored as a HashMap.
|
||||
*/
|
||||
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
|
||||
|
||||
/**
|
||||
* Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
|
||||
* datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
|
||||
*
|
||||
* This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
|
||||
*/
|
||||
private HashMap<String, String> sampleAliases = new HashMap<String, String>();
|
||||
|
||||
/**
|
||||
* While loading sample files, we must be aware of "special" properties and relationships that are always allowed
|
||||
*/
|
||||
public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
|
||||
public static final String[] specialRelationships = new String[] {"mother", "father"};
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
* @param header SAMFileHeader that has been created for this analysis
|
||||
* @param sampleFiles Sample files that were included on the command line
|
||||
*/
|
||||
public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
|
||||
this();
|
||||
this.header = header;
|
||||
// create empty sample object for each sample referenced in the SAM header
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
Sample newSample = new Sample(sampleName);
|
||||
newSample.setSAMFileEntry(true);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
|
||||
// add files consecutively
|
||||
if (sampleFiles != null) {
|
||||
for (File file : sampleFiles) {
|
||||
addFile(file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public SampleDataSource() {
|
||||
samples.put(null, new Sample(null));
|
||||
}
|
||||
|
||||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public void addSamplesFromSAMHeader(SAMFileHeader header) {
|
||||
for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
|
||||
if (!hasSample(sampleName)) {
|
||||
Sample newSample = new Sample(sampleName);
|
||||
newSample.setSAMFileEntry(true);
|
||||
samples.put(sampleName, newSample);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
public void addFile(File sampleFile) {
|
||||
|
||||
BufferedReader reader;
|
||||
try {
|
||||
reader = new BufferedReader(new FileReader(sampleFile));
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
|
||||
}
|
||||
|
||||
// set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
|
||||
Constructor con = new Constructor(SampleFileParser.class);
|
||||
TypeDescription desc = new TypeDescription(SampleFileParser.class);
|
||||
desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
|
||||
desc.putListPropertyType("sampleAliases", SampleAlias.class);
|
||||
con.addTypeDescription(desc);
|
||||
Yaml yaml = new Yaml(con);
|
||||
|
||||
// SampleFileParser stores an object representation of a sample file - this is what we'll parse
|
||||
SampleFileParser parser;
|
||||
try {
|
||||
parser = (SampleFileParser) yaml.load(reader);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
|
||||
}
|
||||
|
||||
// check to see which validation options were built into the file
|
||||
boolean restrictProperties = parser.getAllowedProperties() != null;
|
||||
boolean restrictRelationships = parser.getAllowedRelationships() != null;
|
||||
boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
|
||||
|
||||
// propertyValues stores the values that are allowed for a given property
|
||||
HashMap<String, HashSet> propertyValues = null;
|
||||
if (restrictPropertyValues) {
|
||||
propertyValues = new HashMap<String, HashSet>();
|
||||
for (PropertyDefinition def : parser.getPropertyDefinitions()) {
|
||||
HashSet<String> set = new HashSet<String>();
|
||||
for (String value : def.getValues()) {
|
||||
set.add(value);
|
||||
}
|
||||
propertyValues.put(def.getProperty(), set);
|
||||
}
|
||||
}
|
||||
|
||||
// make sure the aliases are valid
|
||||
validateAliases(parser);
|
||||
|
||||
// loop through each sample in the file - a SampleParser stores an object that will become a Sample
|
||||
for (SampleParser sampleParser : parser.getSamples()) {
|
||||
|
||||
try {
|
||||
// step 1: add the sample if it doesn't already exist
|
||||
Sample sample = getSampleById(sampleParser.getId());
|
||||
if (sample == null) {
|
||||
sample = new Sample(sampleParser.getId());
|
||||
}
|
||||
addSample(sample);
|
||||
sample.setSampleFileEntry(true);
|
||||
|
||||
// step 2: add the properties
|
||||
if (sampleParser.getProperties() != null) {
|
||||
for (String property : sampleParser.getProperties().keySet()) {
|
||||
|
||||
// check that property is allowed
|
||||
if (restrictProperties) {
|
||||
if (!isPropertyValid(property, parser.getAllowedProperties())) {
|
||||
throw new StingException(property + " is an invalid property. It is not included in the list " +
|
||||
"of allowed properties.");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that the value is allowed
|
||||
if (restrictPropertyValues) {
|
||||
if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
|
||||
throw new StingException("The value of property '" + property + "' is invalid. " +
|
||||
"It is not included in the list of allowed values for this property.");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that there isn't already a conflicting property there
|
||||
if (sample.getProperty(property) != null &&
|
||||
sample.getProperty(property) != sampleParser.getProperties().get(property))
|
||||
{
|
||||
throw new StingException(property + " is a conflicting property!");
|
||||
}
|
||||
|
||||
// checks are passed - now add the property!
|
||||
saveProperty(sample, property, sampleParser.getProperties().get(property));
|
||||
}
|
||||
}
|
||||
|
||||
// step 3: add the relationships
|
||||
if (sampleParser.getRelationships() != null) {
|
||||
for (String relationship : sampleParser.getRelationships().keySet()) {
|
||||
String relativeId = sampleParser.getRelationships().get(relationship);
|
||||
if (relativeId == null) {
|
||||
throw new StingException("The relationship cannot be null");
|
||||
}
|
||||
|
||||
// first check that it's not invalid
|
||||
if (restrictRelationships) {
|
||||
if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
|
||||
throw new StingException(relationship + " is an invalid relationship");
|
||||
}
|
||||
}
|
||||
|
||||
// next check that there isn't already a conflicting property there
|
||||
if (sample.getRelationship(relationship) != null) {
|
||||
if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
|
||||
throw new StingException(relationship + " is a conflicting relationship!");
|
||||
}
|
||||
// if the relationship is already set - and consistent with what we're reading now - no need to continue
|
||||
else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// checks are passed - now save the relationship
|
||||
saveRelationship(sample, relationship, relativeId);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
throw new StingException("An error occurred while loading this sample from the sample file: " +
|
||||
sampleParser.getId(), e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
|
||||
|
||||
// if the property values weren't specified for this property, then any value is okay
|
||||
if (!valuesList.containsKey(key)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// if this property has enumerated values, it must be a string
|
||||
else if (value.getClass() != String.class)
|
||||
return false;
|
||||
|
||||
// is the value specified or not?
|
||||
else if (!valuesList.get(key).contains(value))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Makes sure that the aliases are valid
|
||||
* Checks that 1) no string is used as both a main ID and an alias;
|
||||
* 2) no alias is used more than once
|
||||
* @param parser
|
||||
*/
|
||||
private void validateAliases(SampleFileParser parser) {
|
||||
|
||||
// no aliases sure validate
|
||||
if (parser.getSampleAliases() == null)
|
||||
return;
|
||||
|
||||
HashSet<String> mainIds = new HashSet<String>();
|
||||
HashSet<String> otherIds = new HashSet<String>();
|
||||
|
||||
for (SampleAlias sampleAlias : parser.getSampleAliases()) {
|
||||
mainIds.add(sampleAlias.getMainId());
|
||||
for (String otherId : sampleAlias.getOtherIds()) {
|
||||
if (mainIds.contains(otherId))
|
||||
throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
|
||||
"be both a main ID and an other ID", otherId));
|
||||
|
||||
if (!otherIds.add(otherId))
|
||||
throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
|
||||
"alias more than once.", otherId));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isPropertyValid(String property, String[] allowedProperties) {
|
||||
|
||||
// is it a special property that is always allowed?
|
||||
for (String allowedProperty : specialProperties) {
|
||||
if (property.equals(allowedProperty))
|
||||
return true;
|
||||
}
|
||||
|
||||
// is it in the allowed properties list?
|
||||
for (String allowedProperty : allowedProperties) {
|
||||
if (property.equals(allowedProperty))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
|
||||
|
||||
// is it a special relationship that is always allowed?
|
||||
for (String allowedRelationship : specialRelationships) {
|
||||
if (relationship.equals(allowedRelationship))
|
||||
return true;
|
||||
}
|
||||
|
||||
// is it in the allowed properties list?
|
||||
for (String allowedRelationship : allowedRelationships) {
|
||||
if (relationship.equals(allowedRelationship))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a property as the correct type
|
||||
* @param key property key
|
||||
* @param value property value, as read from YAML parser
|
||||
* @return property value to be stored
|
||||
*/
|
||||
private void saveProperty(Sample sample, String key, Object value) {
|
||||
|
||||
// convert gender to the right type, if it was stored as a String
|
||||
if (key.equals("gender")) {
|
||||
if (((String) value).toLowerCase().equals("male")) {
|
||||
value = Sample.Gender.MALE;
|
||||
}
|
||||
else if (((String) value).toLowerCase().equals("female")) {
|
||||
value = Sample.Gender.FEMALE;
|
||||
}
|
||||
else if (((String) value).toLowerCase().equals("unknown")) {
|
||||
value = Sample.Gender.UNKNOWN;
|
||||
}
|
||||
else if (value != null) {
|
||||
throw new StingException("'gender' property must be male, female, or unknown.");
|
||||
}
|
||||
}
|
||||
try {
|
||||
sample.setProperty(key, value);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not save property " + key, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Saves a relationship as the correct type
|
||||
* @param key relationship key
|
||||
* @param relativeId sample ID string of the relative
|
||||
* @return relationship value to be stored
|
||||
*/
|
||||
private void saveRelationship(Sample sample, String key, String relativeId) {
|
||||
|
||||
// get the reference that we'll store as the value
|
||||
Sample relative = getSampleById(relativeId);
|
||||
|
||||
// create sample object for the relative, if necessary
|
||||
if (relative == null) {
|
||||
relative = new Sample(relativeId);
|
||||
addSample(relative);
|
||||
}
|
||||
sample.setRelationship(key, relative);
|
||||
}
|
||||
|
||||
|
||||
|
||||
/**
|
||||
* Filter a sample name in case it is an alias
|
||||
* @param sampleId to be filtered
|
||||
* @return ID of sample that stores data for this alias
|
||||
*/
|
||||
private String aliasFilter(String sampleId) {
|
||||
if (!sampleAliases.containsKey(sampleId))
|
||||
return sampleId;
|
||||
else
|
||||
return sampleAliases.get(sampleId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a sample to the collection
|
||||
* @param sample to be added
|
||||
*/
|
||||
private void addSample(Sample sample) {
|
||||
samples.put(sample.getId(), sample);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if sample with this ID exists
|
||||
* Note that this will return true if name passed in is an alias
|
||||
* @param id ID of sample to be checked
|
||||
* @return true if sample exists; false if not
|
||||
*/
|
||||
public boolean hasSample(String id) {
|
||||
return samples.get(aliasFilter(id)) != null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id
|
||||
* @return sample Object with this ID
|
||||
*/
|
||||
public Sample getSampleById(String id) {
|
||||
return samples.get(aliasFilter(id));
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample for a given read group
|
||||
* Must first look up ID for read group
|
||||
* @param readGroup of sample
|
||||
* @return sample object with ID from the read group
|
||||
*/
|
||||
public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
|
||||
String nameFromReadGroup = readGroup.getSample();
|
||||
return getSampleById(nameFromReadGroup);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get a sample for a given read
|
||||
* Must first look up read group, and then sample ID for that read group
|
||||
* @param read of sample
|
||||
* @return sample object of this read
|
||||
*/
|
||||
public Sample getSampleByRead(SAMRecord read) {
|
||||
return getSampleByReadGroup(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return samples.size();
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
|
||||
* @param familyId
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
HashSet<Sample> familyMembers = new HashSet<Sample>();
|
||||
|
||||
for (Sample sample : samples.values()) {
|
||||
if (sample.getFamilyId() != null) {
|
||||
if (sample.getFamilyId().equals(familyId))
|
||||
familyMembers.add(sample);
|
||||
}
|
||||
}
|
||||
return familyMembers;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
HashSet<Sample> children = new HashSet<Sample>();
|
||||
for (Sample familyMember : getFamily(sample.getFamilyId())) {
|
||||
if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
|
||||
children.add(familyMember);
|
||||
}
|
||||
}
|
||||
return children;
|
||||
}
|
||||
|
||||
public Set<Sample> getSamples() {
|
||||
HashSet<Sample> set = new HashSet<Sample>();
|
||||
set.addAll(samples.values());
|
||||
return set;
|
||||
}
|
||||
|
||||
/**
|
||||
* Takes a collection of sample names and returns their corresponding sample objects
|
||||
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
|
||||
* @param sampleNameList Set of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
for (String name : sampleNameList) {
|
||||
try {
|
||||
samples.add(getSampleById(name));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not get sample with the following ID: " + name, e);
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have any value (which could be null) for a given property
|
||||
* @param key Property key
|
||||
* @return Set of samples with the property
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key) {
|
||||
HashSet<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample s : samples.values()) {
|
||||
if (s.hasProperty(key))
|
||||
toReturn.add(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of samples that have a property with a certain value
|
||||
* Value must be a string for now - could add a similar method for matching any objects in the future
|
||||
*
|
||||
* @param key Property key
|
||||
* @param value String property value
|
||||
* @return Set of samples that match key and value
|
||||
*/
|
||||
public Set<Sample> getSamplesWithProperty(String key, String value) {
|
||||
Set<Sample> toReturn = getSamplesWithProperty(key);
|
||||
for (Sample s : toReturn) {
|
||||
if (!s.getProperty(key).equals(value))
|
||||
toReturn.remove(s);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
public Sample getOrCreateSample(String id) {
|
||||
Sample sample = getSampleById(id);
|
||||
if (sample == null) {
|
||||
sample = new Sample(id);
|
||||
addSample(sample);
|
||||
}
|
||||
return sample;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all samples that were referenced in the SAM file
|
||||
*/
|
||||
public Set<Sample> getSAMFileSamples() {
|
||||
Set<Sample> toReturn = new HashSet<Sample>();
|
||||
for (Sample sample : samples.values()) {
|
||||
if (sample.hasSAMFileEntry())
|
||||
toReturn.add(sample);
|
||||
}
|
||||
return toReturn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a set of sample objects for the sample names in a variant context
|
||||
*
|
||||
* @param context Any variant context
|
||||
* @return a set of the sample objects
|
||||
*/
|
||||
public Set<Sample> getSamplesByVariantContext(VariantContext context) {
|
||||
Set<Sample> samples = new HashSet<Sample>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
samples.add(getOrCreateSample(sampleName));
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return a subcontext restricted to samples with a given property key/value
|
||||
* Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
|
||||
* @param context VariantContext to filter
|
||||
* @param key property key
|
||||
* @param value property value (must be string)
|
||||
* @return subcontext
|
||||
*/
|
||||
public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
|
||||
|
||||
Set<String> samplesWithProperty = new HashSet<String>();
|
||||
for (String sampleName : context.getSampleNames()) {
|
||||
Sample s = samples.get(sampleName);
|
||||
if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
|
||||
samplesWithProperty.add(sampleName);
|
||||
}
|
||||
Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
|
||||
return context.subContextFromGenotypes(genotypes.values());
|
||||
}
|
||||
|
||||
public static SampleDataSource createEmptyDataSource() {
|
||||
SAMFileHeader header = new SAMFileHeader();
|
||||
return new SampleDataSource(header, null);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 12, 2010
|
||||
* Time: 1:30:44 PM
|
||||
*/
|
||||
public class SampleFileParser {
|
||||
|
||||
private SampleAlias[] sampleAliases;
|
||||
|
||||
private String[] allowedProperties;
|
||||
|
||||
private String[] allowedRelationships;
|
||||
|
||||
private PropertyDefinition[] propertyDefinitions;
|
||||
|
||||
private SampleParser[] samples;
|
||||
|
||||
public PropertyDefinition[] getPropertyDefinitions() {
|
||||
return propertyDefinitions;
|
||||
}
|
||||
|
||||
public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
|
||||
this.propertyDefinitions = propertyDefinitions;
|
||||
}
|
||||
|
||||
public SampleFileParser() {
|
||||
|
||||
}
|
||||
|
||||
public String[] getAllowedProperties() {
|
||||
return allowedProperties;
|
||||
}
|
||||
|
||||
public void setAllowedProperties(String[] allowedProperties) {
|
||||
this.allowedProperties = allowedProperties;
|
||||
}
|
||||
|
||||
public SampleParser[] getSamples() {
|
||||
return samples;
|
||||
}
|
||||
|
||||
public void setSamples(SampleParser[] samples) {
|
||||
this.samples = samples;
|
||||
}
|
||||
|
||||
public String[] getAllowedRelationships() {
|
||||
return allowedRelationships;
|
||||
}
|
||||
|
||||
public void setAllowedRelationships(String[] allowedRelationships) {
|
||||
this.allowedRelationships = allowedRelationships;
|
||||
}
|
||||
|
||||
public SampleAlias[] getSampleAliases() {
|
||||
return sampleAliases;
|
||||
}
|
||||
|
||||
public void setSampleAliases(SampleAlias[] sampleAliases) {
|
||||
this.sampleAliases = sampleAliases;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.datasources.sample;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: brett
|
||||
* Date: Aug 13, 2010
|
||||
* Time: 2:09:43 PM
|
||||
*/
|
||||
public class SampleParser {
|
||||
|
||||
private String id;
|
||||
|
||||
private HashMap<String, Object> properties;
|
||||
|
||||
private HashMap<String, String> relationships;
|
||||
|
||||
public String getId() {
|
||||
return id;
|
||||
}
|
||||
|
||||
public void setId(String id) {
|
||||
this.id = id;
|
||||
}
|
||||
|
||||
public HashMap<String, Object> getProperties() {
|
||||
return properties;
|
||||
}
|
||||
|
||||
public void setProperties(HashMap<String, Object> properties) {
|
||||
this.properties = properties;
|
||||
}
|
||||
|
||||
public HashMap<String, String> getRelationships() {
|
||||
return relationships;
|
||||
}
|
||||
|
||||
public void setRelationships(HashMap<String, String> relationships) {
|
||||
this.relationships = relationships;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -85,12 +85,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
|
|||
*/
|
||||
protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
|
||||
super(engine, walker, reads, reference, rods);
|
||||
|
||||
this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
|
||||
|
||||
if (engine.getArguments().processingTrackerFile != null) {
|
||||
throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode. Complain to Mark depristo@broadinstitute.org to implement and test this code path");
|
||||
}
|
||||
}
|
||||
|
||||
public Object execute( Walker walker, ShardStrategy shardStrategy ) {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
|
|||
import org.broadinstitute.sting.gatk.io.OutputTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.LocusWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
|
||||
import java.util.Collection;
|
||||
|
||||
|
|
@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
|
|||
traversalEngine.startTimersIfNecessary();
|
||||
if(shard.getShardType() == Shard.ShardType.LOCUS) {
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
|
||||
WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
|
||||
getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
|
||||
Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
|
||||
|
|
|
|||
|
|
@ -62,16 +62,17 @@ public class ShardTraverser implements Callable {
|
|||
|
||||
Object accumulator = walker.reduceInit();
|
||||
LocusWalker lWalker = (LocusWalker)walker;
|
||||
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
|
||||
ShardDataProvider dataProvider = null;
|
||||
WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
|
||||
microScheduler.getReadIterator(shard),
|
||||
shard.getGenomeLocs(),
|
||||
microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
|
||||
|
||||
for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
|
||||
dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
|
||||
final ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
|
||||
accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
|
||||
dataProvider.close();
|
||||
}
|
||||
|
||||
if (dataProvider != null) dataProvider.close();
|
||||
windowMaker.close();
|
||||
outputMergeTask = outputTracker.closeStorage();
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
|
|||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.reads.Shard;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
|
||||
import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
|
||||
|
|
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
|
|||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.NoSuchElementException;
|
||||
|
|
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
|
|||
* the given intervals.
|
||||
* @param iterator The data source for this window.
|
||||
* @param intervals The set of intervals over which to traverse.
|
||||
* @param sampleData SampleDataSource that we can reference reads with
|
||||
* @param sampleNames The complete set of sample names in the reads in shard
|
||||
*/
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
|
||||
this.sourceInfo = shard.getReadProperties();
|
||||
this.readIterator = iterator;
|
||||
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
|
||||
this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
|
||||
this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
|
||||
}
|
||||
|
||||
public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
|
||||
this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
|
||||
}
|
||||
|
||||
public Iterator<WindowMakerIterator> iterator() {
|
||||
return this;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.filters;
|
|||
import net.sf.samtools.SAMFileHeader;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
/**
|
||||
* Filter out malformed reads.
|
||||
|
|
@ -37,14 +39,25 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
|||
*/
|
||||
public class MalformedReadFilter extends ReadFilter {
|
||||
private SAMFileHeader header;
|
||||
|
||||
|
||||
@Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
|
||||
boolean filterMismatchingBaseAndQuals = false;
|
||||
|
||||
@Override
|
||||
public void initialize(GenomeAnalysisEngine engine) {
|
||||
this.header = engine.getSAMFileHeader();
|
||||
}
|
||||
|
||||
public boolean filterOut(SAMRecord read) {
|
||||
return !checkInvalidAlignmentStart(read) ||
|
||||
// slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided
|
||||
if (!checkMismatchingBasesAndQuals(read)) {
|
||||
if (!filterMismatchingBaseAndQuals)
|
||||
throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +" [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals");
|
||||
else
|
||||
return true;
|
||||
}
|
||||
|
||||
return !checkInvalidAlignmentStart(read) ||
|
||||
!checkInvalidAlignmentEnd(read) ||
|
||||
!checkAlignmentDisagreesWithHeader(this.header,read) ||
|
||||
!checkCigarDisagreesWithAlignment(read);
|
||||
|
|
@ -108,4 +121,13 @@ public class MalformedReadFilter extends ReadFilter {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the read has the same number of bases and base qualities
|
||||
* @param read the read to validate
|
||||
* @return true if they have the same number. False otherwise.
|
||||
*/
|
||||
private static boolean checkMismatchingBasesAndQuals(SAMRecord read) {
|
||||
return (read.getReadLength() == read.getBaseQualities().length);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,23 @@
|
|||
package org.broadinstitute.sting.gatk.filters;
|
||||
|
||||
import net.sf.samtools.Cigar;
|
||||
import net.sf.samtools.CigarElement;
|
||||
import net.sf.samtools.CigarOperator;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: chartl
|
||||
* Date: 9/19/11
|
||||
* Time: 4:09 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
public class ReadNameFilter extends ReadFilter {
|
||||
@Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
|
||||
private String readName;
|
||||
|
||||
public boolean filterOut(final SAMRecord rec) {
|
||||
return ! rec.getReadName().equals(readName);
|
||||
}
|
||||
}
|
||||
|
|
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
else if ( stub.getOutputStream() != null ) {
|
||||
this.file = null;
|
||||
this.stream = stub.getOutputStream();
|
||||
writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
|
||||
writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
|
||||
}
|
||||
else
|
||||
throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
|
||||
|
|
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
|
|||
}
|
||||
|
||||
// The GATK/Tribble can't currently index block-compressed files on the fly. Disable OTF indexing even if the user explicitly asked for it.
|
||||
return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
||||
return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.io.stubs;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.broadinstitute.sting.gatk.CommandLineExecutable;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
|
|
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
|
|||
return isCompressed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the master sequence dictionary from the engine associated with this stub
|
||||
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||
* @return
|
||||
*/
|
||||
public SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||
return engine.getMasterSequenceDictionary();
|
||||
}
|
||||
|
||||
/**
|
||||
* Should we tell the VCF writer not to write genotypes?
|
||||
* @return true if the writer should not write genotypes.
|
||||
|
|
|
|||
|
|
@ -35,26 +35,23 @@ import org.broadinstitute.sting.gatk.DownsampleType;
|
|||
import org.broadinstitute.sting.gatk.DownsamplingMethod;
|
||||
import org.broadinstitute.sting.gatk.ReadProperties;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.Sample;
|
||||
import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.ReservoirDownsampler;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.PileupElement;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
|
||||
public class LocusIteratorByState extends LocusIterator {
|
||||
// private static long discarded_bases = 0L;
|
||||
// private static long observed_bases = 0L;
|
||||
|
||||
/** our log, which we want to capture anything from this class */
|
||||
private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
|
||||
|
||||
|
|
@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* Used to create new GenomeLocs.
|
||||
*/
|
||||
private final GenomeLocParser genomeLocParser;
|
||||
private final ArrayList<Sample> samples;
|
||||
private final ArrayList<String> samples;
|
||||
private final ReadStateManager readStates;
|
||||
|
||||
static private class SAMRecordState {
|
||||
|
|
@ -278,15 +275,27 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//
|
||||
// -----------------------------------------------------------------------------------------------------------------
|
||||
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
|
||||
public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
|
||||
this.readInfo = readInformation;
|
||||
this.genomeLocParser = genomeLocParser;
|
||||
this.samples = new ArrayList<String>(samples);
|
||||
this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
|
||||
|
||||
// get the list of samples
|
||||
this.samples = new ArrayList<Sample>(sampleData.getSamples());
|
||||
|
||||
readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
|
||||
|
||||
// currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
|
||||
// there's no read data. So we need to throw this error only when samIterator.hasNext() is true
|
||||
if ( this.samples.isEmpty() && samIterator.hasNext() ) {
|
||||
throw new IllegalArgumentException("samples list must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For testing only. Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
|
||||
* for the system.
|
||||
*/
|
||||
public final static Collection<String> sampleListForSAMWithoutReadGroups() {
|
||||
List<String> samples = new ArrayList<String>();
|
||||
samples.add(null);
|
||||
return samples;
|
||||
}
|
||||
|
||||
public Iterator<AlignmentContext> iterator() {
|
||||
|
|
@ -303,19 +312,6 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
//if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
|
||||
}
|
||||
|
||||
public void printState() {
|
||||
for(Sample sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
while(iterator.hasNext()) {
|
||||
SAMRecordState state = iterator.next();
|
||||
logger.debug(String.format("printState():"));
|
||||
SAMRecord read = state.getRead();
|
||||
int offset = state.getReadOffset();
|
||||
logger.debug(String.format(" read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private GenomeLoc getLocation() {
|
||||
return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
|
||||
}
|
||||
|
|
@ -355,14 +351,14 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// In this case, the subsequent call to next() will emit the normal pileup at the current base
|
||||
// and shift the position.
|
||||
if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
|
||||
Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
|
||||
Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
|
||||
|
||||
// get current location on the reference and decrement it by 1: the indels we just stepped over
|
||||
// are associated with the *previous* reference base
|
||||
GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
|
@ -382,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
|
||||
}
|
||||
else nInsertions++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadEventStartOffset(),
|
||||
state.getEventLength(),
|
||||
state.getEventBases()) );
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
|
||||
|
||||
} else {
|
||||
// HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
|
||||
|
|
@ -407,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
// we count such reads (with a longer deletion spanning over a deletion at the previous base we are
|
||||
// about to report) only if includeReadsWithDeletionAtLoci is true.
|
||||
size++;
|
||||
indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
|
||||
state.getReadOffset()-1,
|
||||
-1) // length=-1 --> noevent
|
||||
indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -426,10 +417,10 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
|
||||
} else {
|
||||
GenomeLoc location = getLocation();
|
||||
Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
|
||||
Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
|
||||
|
||||
boolean hasBeenSampled = false;
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> iterator = readStates.iterator(sample);
|
||||
List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
|
||||
hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
|
||||
|
|
@ -447,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
continue;
|
||||
} else {
|
||||
//observed_bases++;
|
||||
pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
|
||||
size++;
|
||||
}
|
||||
} else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
|
||||
size++;
|
||||
pile.add(new PileupElement(state.getRead(), -1));
|
||||
pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
|
||||
nDeletions++;
|
||||
}
|
||||
|
||||
|
|
@ -495,7 +486,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
private void updateReadStates() {
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
Iterator<SAMRecordState> it = readStates.iterator(sample);
|
||||
while ( it.hasNext() ) {
|
||||
SAMRecordState state = it.next();
|
||||
|
|
@ -522,7 +513,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
private final PeekableIterator<SAMRecord> iterator;
|
||||
private final DownsamplingMethod downsamplingMethod;
|
||||
private final SamplePartitioner samplePartitioner;
|
||||
private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
|
||||
private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
|
||||
private final int targetCoverage;
|
||||
private int totalReadStates = 0;
|
||||
|
||||
|
|
@ -540,9 +531,9 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
|
||||
Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
readStatesBySample.put(sample,new PerSampleReadStateManager());
|
||||
readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||
readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
|
||||
}
|
||||
|
||||
samplePartitioner = new SamplePartitioner(readSelectors);
|
||||
|
|
@ -554,7 +545,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample The sample.
|
||||
* @return Iterator over the reads associated with that sample.
|
||||
*/
|
||||
public Iterator<SAMRecordState> iterator(final Sample sample) {
|
||||
public Iterator<SAMRecordState> iterator(final String sample) {
|
||||
return new Iterator<SAMRecordState>() {
|
||||
private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
|
||||
|
||||
|
|
@ -590,7 +581,7 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample The sample.
|
||||
* @return Total number of reads in the given sample.
|
||||
*/
|
||||
public int size(final Sample sample) {
|
||||
public int size(final String sample) {
|
||||
return readStatesBySample.get(sample).size();
|
||||
}
|
||||
|
||||
|
|
@ -600,12 +591,12 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
* @param sample Sample, downsampled independently.
|
||||
* @return Integer stop of the furthest undownsampled region.
|
||||
*/
|
||||
public int getDownsamplingExtent(final Sample sample) {
|
||||
public int getDownsamplingExtent(final String sample) {
|
||||
return readStatesBySample.get(sample).getDownsamplingExtent();
|
||||
}
|
||||
|
||||
public SAMRecordState getFirst() {
|
||||
for(Sample sample: samples) {
|
||||
for(final String sample: samples) {
|
||||
PerSampleReadStateManager reads = readStatesBySample.get(sample);
|
||||
if(!reads.isEmpty())
|
||||
return reads.peek();
|
||||
|
|
@ -639,8 +630,8 @@ public class LocusIteratorByState extends LocusIterator {
|
|||
}
|
||||
samplePartitioner.complete();
|
||||
|
||||
for(Sample sample: samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
|
||||
for(final String sample: samples) {
|
||||
ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
|
||||
|
||||
Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
|
||||
|
||||
|
|
@ -1072,6 +1063,3 @@ class SamplePartitioner implements ReadSelector {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
|
|||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* An iterator which does post-processing of a read, including potentially wrapping
|
||||
|
|
@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
|
|||
* no next exists.
|
||||
*/
|
||||
public SAMRecord next() {
|
||||
return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities);
|
||||
SAMRecord rec = wrappedIterator.next();
|
||||
|
||||
// if we are using default quals, check if we need them, and add if necessary.
|
||||
// 1. we need if reads are lacking or have incomplete quality scores
|
||||
// 2. we add if defaultBaseQualities has a positive value
|
||||
if (defaultBaseQualities >= 0) {
|
||||
byte reads [] = rec.getReadBases();
|
||||
byte quals [] = rec.getBaseQualities();
|
||||
if (quals == null || quals.length < reads.length) {
|
||||
byte new_quals [] = new byte [reads.length];
|
||||
for (int i=0; i<reads.length; i++)
|
||||
new_quals[i] = defaultBaseQualities;
|
||||
rec.setBaseQualities(new_quals);
|
||||
}
|
||||
}
|
||||
|
||||
// if we are using original quals, set them now if they are present in the record
|
||||
if ( useOriginalBaseQualities ) {
|
||||
byte[] originalQuals = rec.getOriginalBaseQualities();
|
||||
if ( originalQuals != null )
|
||||
rec.setBaseQualities(originalQuals);
|
||||
}
|
||||
|
||||
return rec;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ import org.apache.log4j.Level;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
|
||||
import org.broadinstitute.sting.gatk.walkers.Walker;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
|
@ -101,9 +100,6 @@ public class GATKRunReport {
|
|||
@Element(required = false, name = "exception")
|
||||
private final ExceptionToXML mException;
|
||||
|
||||
@Element(required = false, name = "argument_collection")
|
||||
private final GATKArgumentCollection mCollection;
|
||||
|
||||
@Element(required = true, name = "working_directory")
|
||||
private String currentPath;
|
||||
|
||||
|
|
@ -187,7 +183,6 @@ public class GATKRunReport {
|
|||
cmdLine = engine.createApproximateCommandLineArgumentString(engine, walker);
|
||||
} catch (Exception ignore) { }
|
||||
|
||||
this.mCollection = engine.getArguments();
|
||||
walkerName = engine.getWalkerName(walker.getClass());
|
||||
svnVersion = CommandLineGATK.getVersionNumber();
|
||||
|
||||
|
|
@ -293,15 +288,16 @@ public class GATKRunReport {
|
|||
* That is, postReport() is guarenteed not to fail for any reason.
|
||||
*/
|
||||
private File postReportToLocalDisk(File rootDir) {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
try {
|
||||
String filename = getID() + ".report.xml.gz";
|
||||
File file = new File(rootDir, filename);
|
||||
postReportToFile(file);
|
||||
logger.debug("Wrote report to " + file);
|
||||
return file;
|
||||
} catch ( Exception e ) {
|
||||
// we catch everything, and no matter what eat the error
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
file.delete();
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -312,6 +308,7 @@ public class GATKRunReport {
|
|||
File localFile = postReportToLocalDisk(new File("./"));
|
||||
logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
|
||||
if ( localFile != null ) { // we succeeded in creating the local file
|
||||
localFile.deleteOnExit();
|
||||
try {
|
||||
// stop us from printing the annoying, and meaningless, mime types warning
|
||||
Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
|
||||
|
|
@ -336,14 +333,13 @@ public class GATKRunReport {
|
|||
//logger.info("Uploading " + localFile + " to AWS bucket");
|
||||
S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
|
||||
logger.debug("Uploaded to AWS: " + s3Object);
|
||||
logger.info("Uploaded run statistics report to AWS S3");
|
||||
} catch ( S3ServiceException e ) {
|
||||
exceptDuringRunReport("S3 exception occurred", e);
|
||||
} catch ( NoSuchAlgorithmException e ) {
|
||||
exceptDuringRunReport("Couldn't calculate MD5", e);
|
||||
} catch ( IOException e ) {
|
||||
exceptDuringRunReport("Couldn't read report file", e);
|
||||
} finally {
|
||||
localFile.delete();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,130 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.indexer;
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.Tribble;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broad.tribble.index.IndexFactory;
|
||||
import org.broad.tribble.util.LittleEndianOutputStream;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.commandline.CommandLineProgram;
|
||||
import org.broadinstitute.sting.commandline.Input;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
|
||||
import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
|
||||
/**
|
||||
* a utility class that can create an index, written to a target location. This is useful when you're unable to write to the directory
|
||||
* in which an index is located, or if you'd like to pre-index files to save time.
|
||||
*/
|
||||
public class RMDIndexer extends CommandLineProgram {
|
||||
@Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
|
||||
File inputFileSource = null;
|
||||
|
||||
@Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
|
||||
String inputFileType = null;
|
||||
|
||||
@Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
|
||||
public File referenceFile = null;
|
||||
|
||||
@Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
|
||||
public File indexFile = null;
|
||||
|
||||
@Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
|
||||
IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
|
||||
|
||||
private static Logger logger = Logger.getLogger(RMDIndexer.class);
|
||||
private IndexedFastaSequenceFile ref = null;
|
||||
private GenomeLocParser genomeLocParser = null;
|
||||
|
||||
@Override
|
||||
protected int execute() throws Exception {
|
||||
|
||||
|
||||
// check parameters
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
// check the input parameters
|
||||
if (referenceFile != null && !referenceFile.canRead())
|
||||
throw new IllegalArgumentException("We can't read the reference file: "
|
||||
+ referenceFile + ", check that it exists, and that you have permissions to read it");
|
||||
|
||||
|
||||
// set the index file to the default name if they didn't specify a file
|
||||
if (indexFile == null && inputFileSource != null)
|
||||
indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
|
||||
|
||||
// check that we can create the output file
|
||||
if (indexFile == null || indexFile.exists())
|
||||
throw new IllegalArgumentException("We can't write to the index file location: "
|
||||
+ indexFile + ", the index exists");
|
||||
|
||||
logger.info(String.format("attempting to index file: %s", inputFileSource));
|
||||
logger.info(String.format("using reference: %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
|
||||
logger.info(String.format("using type: %s", inputFileType));
|
||||
logger.info(String.format("writing to location: %s", indexFile.getAbsolutePath()));
|
||||
|
||||
// try to index the file
|
||||
// ---------------------------------------------------------------------------------
|
||||
|
||||
// setup the reference
|
||||
ref = new CachingIndexedFastaSequenceFile(referenceFile);
|
||||
genomeLocParser = new GenomeLocParser(ref);
|
||||
|
||||
// get a track builder
|
||||
RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
|
||||
|
||||
// find the types available to the track builders
|
||||
FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
|
||||
|
||||
// check that the type is valid
|
||||
if (descriptor == null)
|
||||
throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type. Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
|
||||
|
||||
// create the codec
|
||||
FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
|
||||
|
||||
// check if it's a reference dependent feature codec
|
||||
if (codec instanceof ReferenceDependentFeatureCodec)
|
||||
((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
|
||||
|
||||
// get some timing info
|
||||
long currentTime = System.currentTimeMillis();
|
||||
|
||||
Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
|
||||
|
||||
// add writing of the sequence dictionary, if supplied
|
||||
builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
|
||||
|
||||
// create the output stream, and write the index
|
||||
LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
|
||||
index.write(stream);
|
||||
stream.close();
|
||||
|
||||
// report and exit
|
||||
logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
|
||||
return 0; // return successfully
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* the generic call execute main
|
||||
* @param argv the arguments from the command line
|
||||
*/
|
||||
public static void main(String[] argv) {
|
||||
try {
|
||||
RMDIndexer instance = new RMDIndexer();
|
||||
start(instance, argv);
|
||||
System.exit(CommandLineProgram.result);
|
||||
} catch (Exception e) {
|
||||
exitSystemWithError(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.index.Index;
|
||||
import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Utilities for working with Sequence Dictionaries embedded in tribble indices
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class IndexDictionaryUtils {
|
||||
private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
|
||||
|
||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||
|
||||
/**
|
||||
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
||||
* @param index the index file to use
|
||||
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||
*/
|
||||
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
||||
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
||||
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
||||
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
||||
Integer.valueOf(entry.getValue())));
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the sequence dictionary with the contig list; a backup approach
|
||||
* @param index the index file to use
|
||||
* @param dict the sequence dictionary to add contigs to
|
||||
* @return the filled-in sequence dictionary
|
||||
*/
|
||||
static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
||||
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
||||
if (seqNames == null) {
|
||||
return dict;
|
||||
}
|
||||
for (String name : seqNames) {
|
||||
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
||||
dict.addSequence(seq);
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
|
||||
for ( SAMSequenceRecord seq : dict.getSequences() ) {
|
||||
final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
|
||||
final String length = String.valueOf(seq.getSequenceLength());
|
||||
index.addProperty(contig,length);
|
||||
}
|
||||
}
|
||||
|
||||
public static void validateTrackSequenceDictionary(final String trackName,
|
||||
final SAMSequenceDictionary trackDict,
|
||||
final SAMSequenceDictionary referenceDict,
|
||||
final ValidationExclusion.TYPE validationExclusionType ) {
|
||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||
if (trackDict == null || trackDict.size() == 0)
|
||||
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||
else {
|
||||
Set<String> trackSequences = new TreeSet<String>();
|
||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.tracks;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import net.sf.samtools.SAMSequenceRecord;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureSource;
|
||||
|
|
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
|
|||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
|
||||
import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
|
||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
||||
import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
|
|||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* @author aaron
|
||||
* `
|
||||
* Class RMDTrackBuilder
|
||||
*
|
||||
|
|
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
|
||||
public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
|
||||
|
||||
// a constant we use for marking sequence dictionary entries in the Tribble index property list
|
||||
public static final String SequenceDictionaryPropertyPredicate = "DICT:";
|
||||
|
||||
// private sequence dictionary we use to set our tracks with
|
||||
private SAMSequenceDictionary dict = null;
|
||||
|
||||
|
|
@ -150,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
|
||||
|
||||
if (descriptor == null)
|
||||
throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
|
||||
throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
|
||||
|
||||
return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
|
||||
}
|
||||
|
|
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
try { logger.info(String.format(" Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
|
||||
catch (ReviewedStingException e) { }
|
||||
|
||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||
|
||||
// if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
|
||||
if (sequenceDictionary.size() == 0 && dict != null) {
|
||||
File indexFile = Tribble.indexFile(inputFile);
|
||||
setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
|
||||
sequenceDictionary = getSequenceDictionaryFromProperties(index);
|
||||
validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
|
||||
try { // re-write the index
|
||||
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
||||
} catch (IOException e) {
|
||||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||
}
|
||||
|
||||
sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
|
||||
}
|
||||
|
||||
if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
|
||||
|
|
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
|
|||
// this can take a while, let them know what we're doing
|
||||
logger.info("Creating Tribble index in memory for file " + inputFile);
|
||||
Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
|
||||
setIndexSequenceDictionary(inputFile, idx, dict, null, false);
|
||||
validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
// static functions to work with the sequence dictionaries of indexes
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* get the sequence dictionary from the track, if available. If not, make it from the contig list that is always in the index
|
||||
* @param index the index file to use
|
||||
* @return a SAMSequenceDictionary if available, null if unavailable
|
||||
*/
|
||||
public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
|
||||
SAMSequenceDictionary dict = new SAMSequenceDictionary();
|
||||
for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
|
||||
if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
|
||||
dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
|
||||
Integer.valueOf(entry.getValue())));
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* create the sequence dictionary with the contig list; a backup approach
|
||||
* @param index the index file to use
|
||||
* @param dict the sequence dictionary to add contigs to
|
||||
* @return the filled-in sequence dictionary
|
||||
*/
|
||||
private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
|
||||
LinkedHashSet<String> seqNames = index.getSequenceNames();
|
||||
if (seqNames == null) {
|
||||
return dict;
|
||||
}
|
||||
for (String name : seqNames) {
|
||||
SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
|
||||
dict.addSequence(seq);
|
||||
}
|
||||
return dict;
|
||||
}
|
||||
|
||||
/**
|
||||
* set the sequence dictionary of the track. This function checks that the contig listing of the underlying file is compatible.
|
||||
* (that each contig in the index is in the sequence dictionary).
|
||||
* @param inputFile for proper error message formatting.
|
||||
* @param dict the sequence dictionary
|
||||
* @param index the index file
|
||||
* @param indexFile the index file
|
||||
* @param rewriteIndex should we rewrite the index when we're done?
|
||||
*
|
||||
*/
|
||||
public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
|
||||
if (dict == null) return;
|
||||
|
||||
SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
|
||||
public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
|
||||
if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
|
||||
|
||||
// check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
|
||||
for (SAMSequenceRecord seq : currentDict.getSequences()) {
|
||||
if (dict.getSequence(seq.getSequenceName()) == null)
|
||||
continue;
|
||||
index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
|
||||
}
|
||||
// re-write the index
|
||||
if (rewriteIndex) try {
|
||||
writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
|
||||
} catch (IOException e) {
|
||||
logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
|
||||
}
|
||||
final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
|
||||
validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
|
||||
|
||||
// actually update the dictionary in the index
|
||||
IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
|
||||
}
|
||||
|
||||
|
||||
public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
|
||||
// if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
|
||||
if (trackDict == null || trackDict.size() == 0)
|
||||
logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
|
||||
else {
|
||||
Set<String> trackSequences = new TreeSet<String>();
|
||||
for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
|
||||
trackSequences.add(dictionaryEntry.getSequenceName());
|
||||
SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
|
||||
}
|
||||
public void validateTrackSequenceDictionary(final String trackName,
|
||||
final SAMSequenceDictionary trackDict,
|
||||
final SAMSequenceDictionary referenceDict ) {
|
||||
IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,57 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.refdata.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
* @author aaron
|
||||
*
|
||||
* Class RMDIntervalGenerator
|
||||
*
|
||||
* Creates an interval list, given an RMDTrack
|
||||
*/
|
||||
public class RMDIntervalGenerator {
|
||||
public ReferenceOrderedDataSource dataSource;
|
||||
|
||||
/**
|
||||
* create a interval representation of a ROD track
|
||||
* @param dataSource the track
|
||||
*/
|
||||
public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
|
||||
if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
|
||||
this.dataSource = dataSource;
|
||||
}
|
||||
|
||||
/**
|
||||
* create a genome location list from the interval track
|
||||
* @return a list of genome locations
|
||||
*/
|
||||
public List<GenomeLoc> toGenomeLocList() {
|
||||
Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
|
||||
List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
|
||||
while (iter.hasNext()) {
|
||||
RODRecordList feature = iter.next();
|
||||
GenomeLoc loc = feature.getLocation();
|
||||
if (loc != null) locations.add(loc);
|
||||
}
|
||||
return locations;
|
||||
}
|
||||
|
||||
/**
|
||||
* return a map of reference meta data track names to RODS
|
||||
* @param sources the reference ordered data sources to get the names from
|
||||
* @return a map of reference meta data names to RODS
|
||||
*/
|
||||
public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
|
||||
// get a list of the current rod names we're working with
|
||||
Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
|
||||
for (ReferenceOrderedDataSource rod : sources) {
|
||||
rodNames.put(rod.getName(),rod);
|
||||
}
|
||||
return rodNames;
|
||||
}
|
||||
}
|
||||
|
|
@ -24,12 +24,14 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.report;
|
||||
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Tracks a linked list of GATKReportColumn in order by name.
|
||||
*/
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
||||
public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
|
||||
private List<String> columnNames = new ArrayList<String>();
|
||||
|
||||
/**
|
||||
|
|
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
|
|||
columnNames.add(key);
|
||||
return super.put(key, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<GATKReportColumn> iterator() {
|
||||
return new Iterator<GATKReportColumn>() {
|
||||
int offset = 0;
|
||||
public boolean hasNext() { return offset < columnNames.size() ; }
|
||||
public GATKReportColumn next() { return getByIndex(offset++); }
|
||||
public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -286,6 +286,10 @@ public class GATKReportTable {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean containsKey(Object primaryKey) {
|
||||
return primaryKeyColumn.contains(primaryKey);
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the value for a given position in the table
|
||||
*
|
||||
|
|
|
|||
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* Categorical sample trait for association and analysis
|
||||
*
|
||||
* Samples can have unknown status, be affected or unaffected by the
|
||||
* categorical trait, or they can be marked as actually having an
|
||||
* other trait value (stored in an associated value in the Sample class)
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since Sept. 2011
|
||||
*/
|
||||
public enum Affection {
|
||||
/** Status is unknown */
|
||||
UNKNOWN,
|
||||
/** Suffers from the disease */
|
||||
AFFECTED,
|
||||
/** Unaffected by the disease */
|
||||
UNAFFECTED,
|
||||
/** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
|
||||
OTHER
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
* ENUM of possible human genders: male, female, or unknown
|
||||
*/
|
||||
public enum Gender {
|
||||
MALE,
|
||||
FEMALE,
|
||||
UNKNOWN
|
||||
}
|
||||
|
|
@ -0,0 +1,310 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Reads PED file-formatted tabular text files
|
||||
*
|
||||
* See http://www.broadinstitute.org/mpg/tagger/faq.html
|
||||
* See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
|
||||
*
|
||||
* The "ped" file format refers to the widely-used format for linkage pedigree data.
|
||||
* Each line describes a single (diploid) individual in the following format:
|
||||
*
|
||||
* family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
|
||||
*
|
||||
* If your data lacks pedigree information (for example, unrelated case/control individuals),
|
||||
* set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
|
||||
* phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
|
||||
* Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
|
||||
* where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
|
||||
* Check out the information at the PLINK website on the "ped" file format.
|
||||
*
|
||||
* The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
|
||||
* Family ID
|
||||
* Individual ID
|
||||
* Paternal ID
|
||||
* Maternal ID
|
||||
* Sex (1=male; 2=female; other=unknown)
|
||||
* Phenotype
|
||||
*
|
||||
* The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
|
||||
* A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
|
||||
* quantitative trait or an affection status column: PLINK will automatically detect which type
|
||||
* (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
|
||||
* Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
|
||||
* and are actually representing these values as strings instead of doubles
|
||||
*
|
||||
* NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
|
||||
* not a comma, i.e. 2.394 not 2,394
|
||||
*
|
||||
* If an individual's sex is unknown, then any character other than 1 or 2 can be used.
|
||||
* When new files are created (PED, FAM, or other which contain sex) then the original coding will be
|
||||
* preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
|
||||
* and an error message will arise if an analysis that uses family information is requested and an
|
||||
* individual of 'unknown' sex is specified as a father or mother.
|
||||
*
|
||||
*
|
||||
* HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
|
||||
* line will be ignored. Do not start any family IDs with this character therefore.
|
||||
*
|
||||
* Affection status, by default, should be coded:
|
||||
* -9 missing
|
||||
* 0 missing
|
||||
* 1 unaffected
|
||||
* 2 affected
|
||||
*
|
||||
* If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
|
||||
* plink --file mydata --1 which will specify a disease phenotype coded:
|
||||
*
|
||||
* -9 missing
|
||||
* 0 unaffected
|
||||
* 1 affected
|
||||
*
|
||||
* The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
|
||||
* disease traits as well as 0). It can be reset by including the --missing-phenotype option:
|
||||
*
|
||||
* Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
|
||||
* (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
|
||||
* character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
|
||||
* alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
|
||||
*
|
||||
* No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
|
||||
*
|
||||
* FAM001 1 0 0 1 2 A A G G A C
|
||||
* FAM001 2 0 0 1 2 A A A G 0 0
|
||||
* ...
|
||||
*
|
||||
* Note that the GATK does not support genotypes in a PED file.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2011
|
||||
*/
|
||||
public class PedReader {
|
||||
private static Logger logger = Logger.getLogger(PedReader.class);
|
||||
final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
|
||||
final static private String commentMarker = "#";
|
||||
|
||||
/**
|
||||
* An enum that specifies which, if any, of the standard PED fields are
|
||||
* missing from the input records. For example, suppose we have the full record:
|
||||
*
|
||||
* "fam1 kid dad mom 1 2"
|
||||
*
|
||||
* indicating a male affected child. This can be parsed with the -ped x.ped argument
|
||||
* to the GATK. Suppose we only have:
|
||||
*
|
||||
* "fam1 kid 1"
|
||||
*
|
||||
* we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
|
||||
*/
|
||||
public enum MissingPedField {
|
||||
/**
|
||||
* The PED records do not have the first (FAMILY_ID) argument. The family id
|
||||
* will be set to null / empty.
|
||||
*/
|
||||
NO_FAMILY_ID,
|
||||
|
||||
/**
|
||||
* The PED records do not have either the paternal or maternal IDs, so
|
||||
* the corresponding IDs are set to null.
|
||||
*/
|
||||
NO_PARENTS,
|
||||
|
||||
/**
|
||||
* The PED records do not have the GENDER field, so the sex of each
|
||||
* sample will be set to UNKNOWN.
|
||||
*/
|
||||
NO_SEX,
|
||||
|
||||
/**
|
||||
* The PED records do not have the PHENOTYPE field, so the phenotype
|
||||
* of each sample will be set to UNKNOWN.
|
||||
*/
|
||||
NO_PHENOTYPE
|
||||
}
|
||||
|
||||
protected enum Field {
|
||||
FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
|
||||
}
|
||||
|
||||
// phenotype
|
||||
private final static String MISSING_VALUE1 = "-9";
|
||||
private final static String MISSING_VALUE2 = "0";
|
||||
private final static String PHENOTYPE_UNAFFECTED = "1";
|
||||
private final static String PHENOTYPE_AFFECTED = "2";
|
||||
|
||||
// Sex
|
||||
private final static String SEX_MALE = "1";
|
||||
private final static String SEX_FEMALE = "2";
|
||||
// other=unknown
|
||||
|
||||
public PedReader() { }
|
||||
|
||||
public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException {
|
||||
logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
|
||||
return parse(new FileReader(source), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
|
||||
logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
|
||||
return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
|
||||
}
|
||||
|
||||
public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
|
||||
final List<String> lines = new XReadLines(reader).readLines();
|
||||
|
||||
// What are the record offsets?
|
||||
final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
|
||||
final int samplePos = familyPos + 1;
|
||||
final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
|
||||
final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
|
||||
final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
|
||||
final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
|
||||
final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
|
||||
|
||||
// go through once and determine properties
|
||||
int lineNo = 1;
|
||||
boolean isQT = false;
|
||||
final List<String[]> splits = new ArrayList<String[]>(lines.size());
|
||||
for ( final String line : lines ) {
|
||||
if ( line.startsWith(commentMarker)) continue;
|
||||
if ( line.trim().equals("") ) continue;
|
||||
|
||||
final String[] parts = line.split("\\s+");
|
||||
|
||||
if ( parts.length != nExpectedFields )
|
||||
throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
|
||||
}
|
||||
|
||||
splits.add(parts);
|
||||
lineNo++;
|
||||
}
|
||||
logger.info("Phenotype is other? " + isQT);
|
||||
|
||||
// now go through and parse each record
|
||||
lineNo = 1;
|
||||
final List<Sample> samples = new ArrayList<Sample>(splits.size());
|
||||
for ( final String[] parts : splits ) {
|
||||
String familyID = null, individualID, paternalID = null, maternalID = null;
|
||||
Gender sex = Gender.UNKNOWN;
|
||||
String quantitativePhenotype = Sample.UNSET_QT;
|
||||
Affection affection = Affection.UNKNOWN;
|
||||
|
||||
if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
|
||||
individualID = parts[samplePos];
|
||||
if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
|
||||
if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
|
||||
|
||||
if ( sexPos != -1 ) {
|
||||
if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
|
||||
else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
|
||||
else sex = Gender.UNKNOWN;
|
||||
}
|
||||
|
||||
if ( phenotypePos != -1 ) {
|
||||
if ( isQT ) {
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) )
|
||||
affection = Affection.UNKNOWN;
|
||||
else {
|
||||
affection = Affection.OTHER;
|
||||
quantitativePhenotype = parts[phenotypePos];
|
||||
}
|
||||
} else {
|
||||
if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
|
||||
else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
|
||||
else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
|
||||
}
|
||||
}
|
||||
|
||||
final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
|
||||
samples.add(s);
|
||||
sampleDB.addSample(s);
|
||||
lineNo++;
|
||||
}
|
||||
|
||||
for ( final Sample sample : new ArrayList<Sample>(samples) ) {
|
||||
Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
|
||||
if ( dad != null ) samples.add(dad);
|
||||
|
||||
Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
|
||||
if ( mom != null ) samples.add(mom);
|
||||
}
|
||||
|
||||
return samples;
|
||||
}
|
||||
|
||||
private final static String maybeMissing(final String string) {
|
||||
if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
|
||||
return null;
|
||||
else
|
||||
return string;
|
||||
}
|
||||
|
||||
private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
|
||||
if ( id != null && sampleDB.getSample(id) == null ) {
|
||||
Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
|
||||
sampleDB.addSample(s);
|
||||
return s;
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a list of tags from the command line, assuming it comes from the GATK Engine
|
||||
* tags, and returns the corresponding EnumSet.
|
||||
*
|
||||
* @param arg the actual engine arg, used for the UserException if there's an error
|
||||
* @param tags a list of string tags that should be converted to the MissingPedField value
|
||||
* @return
|
||||
*/
|
||||
public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
|
||||
final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
|
||||
|
||||
for ( final String tag : tags ) {
|
||||
try {
|
||||
missingFields.add(MissingPedField.valueOf(tag));
|
||||
} catch ( IllegalArgumentException e ) {
|
||||
throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
|
||||
}
|
||||
}
|
||||
|
||||
return missingFields;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,41 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public enum PedigreeValidationType {
|
||||
/**
|
||||
* Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
|
||||
* entry in the pedigree file(s).
|
||||
*/
|
||||
STRICT,
|
||||
|
||||
/**
|
||||
* Do not enforce any overlap between the VCF/BAM samples and the pedigree data
|
||||
* */
|
||||
SILENT
|
||||
}
|
||||
|
|
@ -0,0 +1,222 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
|
||||
final private String familyID, paternalID, maternalID;
|
||||
final private Gender gender;
|
||||
final private String otherPhenotype;
|
||||
final private Affection affection;
|
||||
final private String ID;
|
||||
final private SampleDB infoDB;
|
||||
final private Map<String, Object> properties = new HashMap<String, Object>();
|
||||
|
||||
public final static String UNSET_QT = null;
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection, final String otherPhenotype) {
|
||||
this.familyID = familyID;
|
||||
this.paternalID = paternalID;
|
||||
this.maternalID = maternalID;
|
||||
this.gender = gender;
|
||||
this.otherPhenotype = otherPhenotype;
|
||||
this.affection = affection;
|
||||
this.ID = ID;
|
||||
this.infoDB = infoDB;
|
||||
}
|
||||
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection, final String otherPhenotype) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
|
||||
}
|
||||
|
||||
protected Sample(final String ID,
|
||||
final String familyID, final String paternalID, final String maternalID,
|
||||
final Gender gender, final Affection affection) {
|
||||
this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
|
||||
}
|
||||
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB,
|
||||
final String familyID, final String paternalID, final String maternalID, final Gender gender) {
|
||||
this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
|
||||
this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
|
||||
}
|
||||
|
||||
public Sample(String id, SampleDB infoDB) {
|
||||
this(id, infoDB, null, null, null,
|
||||
Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------------------
|
||||
//
|
||||
// standard property getters
|
||||
//
|
||||
// -------------------------------------------------------------------------------------
|
||||
|
||||
public String getID() {
|
||||
return ID;
|
||||
}
|
||||
|
||||
public String getFamilyID() {
|
||||
return familyID;
|
||||
}
|
||||
|
||||
public String getPaternalID() {
|
||||
return paternalID;
|
||||
}
|
||||
|
||||
public String getMaternalID() {
|
||||
return maternalID;
|
||||
}
|
||||
|
||||
public Affection getAffection() {
|
||||
return affection;
|
||||
}
|
||||
|
||||
public boolean hasOtherPhenotype() {
|
||||
return affection == Affection.OTHER;
|
||||
}
|
||||
|
||||
public String getOtherPhenotype() {
|
||||
return otherPhenotype;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's mother
|
||||
* @return sample object with relationship mother, if exists, or null
|
||||
*/
|
||||
public Sample getMother() {
|
||||
return infoDB.getSample(maternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the sample's father
|
||||
* @return sample object with relationship father, if exists, or null
|
||||
*/
|
||||
public Sample getFather() {
|
||||
return infoDB.getSample(paternalID);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get gender of the sample
|
||||
* @return property of key "gender" - must be of type Gender
|
||||
*/
|
||||
public Gender getGender() {
|
||||
return gender;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(final Sample sample) {
|
||||
return ID.compareTo(sample.getID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
|
||||
getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
|
||||
getOtherPhenotype(), properties);
|
||||
}
|
||||
|
||||
// // -------------------------------------------------------------------------------------
|
||||
// //
|
||||
// // code for working with additional -- none standard -- properites
|
||||
// //
|
||||
// // -------------------------------------------------------------------------------------
|
||||
//
|
||||
// public Map<String, Object> getExtraProperties() {
|
||||
// return Collections.unmodifiableMap(properties);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// * Get one property
|
||||
// * @param key key of property
|
||||
// * @return value of property as generic object
|
||||
// */
|
||||
// public Object getExtraPropertyValue(final String key) {
|
||||
// return properties.get(key);
|
||||
// }
|
||||
//
|
||||
// /**
|
||||
// *
|
||||
// * @param key property key
|
||||
// * @return true if sample has this property (even if its value is null)
|
||||
// */
|
||||
// public boolean hasExtraProperty(String key) {
|
||||
// return properties.containsKey(key);
|
||||
// }
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return ID.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(final Object o) {
|
||||
if(o == null)
|
||||
return false;
|
||||
if(o instanceof Sample) {
|
||||
Sample otherSample = (Sample)o;
|
||||
return ID.equals(otherSample.ID) &&
|
||||
equalOrNull(familyID, otherSample.familyID) &&
|
||||
equalOrNull(paternalID, otherSample.paternalID) &&
|
||||
equalOrNull(maternalID, otherSample.maternalID) &&
|
||||
equalOrNull(gender, otherSample.gender) &&
|
||||
equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
|
||||
equalOrNull(affection, otherSample.affection) &&
|
||||
equalOrNull(properties, otherSample.properties);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private final static boolean equalOrNull(final Object o1, final Object o2) {
|
||||
if ( o1 == null )
|
||||
return o2 == null;
|
||||
else
|
||||
return o2 == null ? false : o1.equals(o2);
|
||||
}
|
||||
|
||||
private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
|
||||
if ( o1 == null || o1.equals(emptyValue) ) {
|
||||
// take o2 if both are null, otherwise keep o2
|
||||
return o2 == null ? null : o2;
|
||||
} else {
|
||||
if ( o2 == null || o2.equals(emptyValue) )
|
||||
return o1; // keep o1, since it's a real value
|
||||
else {
|
||||
// both o1 and o2 have a value
|
||||
if ( o1 == o2 )
|
||||
return o1;
|
||||
else
|
||||
throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final static Sample mergeSamples(final Sample prev, final Sample next) {
|
||||
if ( prev.equals(next) )
|
||||
return next;
|
||||
else {
|
||||
return new Sample(prev.getID(), prev.infoDB,
|
||||
mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
|
||||
mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
|
||||
mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
|
||||
mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
|
||||
mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
|
||||
mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
|
||||
//mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,183 @@
|
|||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import net.sf.samtools.SAMReadGroupRecord;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SampleDB {
|
||||
/**
|
||||
* This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
|
||||
* this is stored as a HashMap.
|
||||
*/
|
||||
private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
*/
|
||||
public SampleDB() {
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Protected function to add a single sample to the database
|
||||
*
|
||||
* @param sample to be added
|
||||
*/
|
||||
protected SampleDB addSample(Sample sample) {
|
||||
Sample prev = samples.get(sample.getID());
|
||||
if ( prev != null )
|
||||
sample = Sample.mergeSamples(prev, sample);
|
||||
samples.put(sample.getID(), sample);
|
||||
return this;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for getting a sample from the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get a sample by its ID
|
||||
* If an alias is passed in, return the main sample object
|
||||
* @param id
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(String id) {
|
||||
return samples.get(id);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param read
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final SAMRecord read) {
|
||||
return getSample(read.getReadGroup());
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param rg
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final SAMReadGroupRecord rg) {
|
||||
return getSample(rg.getSample());
|
||||
}
|
||||
|
||||
/**
|
||||
* @param g Genotype
|
||||
* @return sample Object with this ID, or null if this does not exist
|
||||
*/
|
||||
public Sample getSample(final Genotype g) {
|
||||
return getSample(g.getSampleName());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions for accessing samples in the DB
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Get number of sample objects
|
||||
* @return size of samples map
|
||||
*/
|
||||
public int sampleCount() {
|
||||
return samples.size();
|
||||
}
|
||||
|
||||
public Set<Sample> getSamples() {
|
||||
return new HashSet<Sample>(samples.values());
|
||||
}
|
||||
|
||||
public Collection<String> getSampleNames() {
|
||||
return Collections.unmodifiableCollection(samples.keySet());
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Takes a collection of sample names and returns their corresponding sample objects
|
||||
* Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
|
||||
* @param sampleNameList Set of sample names
|
||||
* @return Corresponding set of samples
|
||||
*/
|
||||
public Set<Sample> getSamples(Collection<String> sampleNameList) {
|
||||
HashSet<Sample> samples = new HashSet<Sample>();
|
||||
for (String name : sampleNameList) {
|
||||
try {
|
||||
samples.add(getSample(name));
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new StingException("Could not get sample with the following ID: " + name, e);
|
||||
}
|
||||
}
|
||||
return samples;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Higher level pedigree functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Returns a sorted set of the family IDs in all samples (excluding null ids)
|
||||
* @return
|
||||
*/
|
||||
public final Set<String> getFamilyIDs() {
|
||||
return getFamilies().keySet();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a map from family ID -> set of family members for all samples with
|
||||
* non-null family ids
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public final Map<String, Set<Sample>> getFamilies() {
|
||||
final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
|
||||
|
||||
for ( final Sample sample : samples.values() ) {
|
||||
final String famID = sample.getFamilyID();
|
||||
if ( famID != null ) {
|
||||
if ( ! families.containsKey(famID) )
|
||||
families.put(famID, new TreeSet<Sample>());
|
||||
families.get(famID).add(sample);
|
||||
}
|
||||
}
|
||||
|
||||
return families;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return all samples with a given family ID
|
||||
* @param familyId
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getFamily(String familyId) {
|
||||
return getFamilies().get(familyId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all children of a given sample
|
||||
* See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
|
||||
* @param sample
|
||||
* @return
|
||||
*/
|
||||
public Set<Sample> getChildren(Sample sample) {
|
||||
final HashSet<Sample> children = new HashSet<Sample>();
|
||||
for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
|
||||
if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
|
||||
children.add(familyMember);
|
||||
}
|
||||
}
|
||||
return children;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,153 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.samples;
|
||||
|
||||
import net.sf.samtools.SAMFileHeader;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SampleDBBuilder {
|
||||
PedigreeValidationType validationStrictness;
|
||||
final SampleDB sampleDB = new SampleDB();
|
||||
final GenomeAnalysisEngine engine;
|
||||
|
||||
Set<Sample> samplesFromDataSources = new HashSet<Sample>();
|
||||
Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
|
||||
|
||||
/** for testing only */
|
||||
protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
|
||||
engine = null;
|
||||
this.validationStrictness = validationStrictness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructor takes both a SAM header and sample files because the two must be integrated.
|
||||
*/
|
||||
public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
|
||||
this.engine = engine;
|
||||
this.validationStrictness = validationStrictness;
|
||||
}
|
||||
|
||||
/**
|
||||
* Hallucinates sample objects for all the samples in the SAM file and stores them
|
||||
*/
|
||||
public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
|
||||
addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
|
||||
for (final String sampleName : sampleNames) {
|
||||
if (sampleDB.getSample(sampleName) == null) {
|
||||
final Sample newSample = new Sample(sampleName, sampleDB);
|
||||
sampleDB.addSample(newSample);
|
||||
samplesFromDataSources.add(newSample); // keep track of data source samples
|
||||
}
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
|
||||
for (final File pedFile : pedigreeFiles) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
|
||||
for (final String pedString : pedigreeStrings) {
|
||||
Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
|
||||
samplesFromPedigrees.addAll(samples);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse one sample file and integrate it with samples that are already there
|
||||
* Fail quickly if we find any errors in the file
|
||||
*/
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
|
||||
final PedReader reader = new PedReader();
|
||||
|
||||
try {
|
||||
return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotReadInputFile(sampleFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
|
||||
final PedReader reader = new PedReader();
|
||||
return reader.parse(string, getMissingFields(string), sampleDB);
|
||||
}
|
||||
|
||||
public SampleDB getFinalSampleDB() {
|
||||
validate();
|
||||
return sampleDB;
|
||||
}
|
||||
|
||||
public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
|
||||
if ( engine == null )
|
||||
return EnumSet.noneOf(PedReader.MissingPedField.class);
|
||||
else {
|
||||
final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
|
||||
return PedReader.parseMissingFieldTags(engineArg, posTags);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Validation
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
protected final void validate() {
|
||||
if ( validationStrictness == PedigreeValidationType.SILENT )
|
||||
return;
|
||||
else {
|
||||
// check that samples in data sources are all annotated, if anything is annotated
|
||||
if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
|
||||
final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
|
||||
for ( final Sample pSample : samplesFromPedigrees )
|
||||
sampleNamesFromPedigrees.add(pSample.getID());
|
||||
|
||||
for ( final Sample dsSample : samplesFromDataSources )
|
||||
if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
|
||||
throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -364,8 +364,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
|
||||
// count up the number of skipped reads by summing over all filters
|
||||
long nSkippedReads = 0L;
|
||||
for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())
|
||||
nSkippedReads += countsByFilter.getValue();
|
||||
for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
|
||||
nSkippedReads += countsByFilter;
|
||||
|
||||
logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", elapsed, elapsed / 60, elapsed / 3600));
|
||||
if ( cumulativeMetrics.getNumReadsSeen() > 0 )
|
||||
|
|
@ -373,10 +373,10 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
|
|||
nSkippedReads,
|
||||
cumulativeMetrics.getNumReadsSeen(),
|
||||
100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen())));
|
||||
for ( Map.Entry<Class, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
|
||||
for ( Map.Entry<String, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
|
||||
long count = filterCounts.getValue();
|
||||
logger.info(String.format(" -> %d reads (%.2f%% of total) failing %s",
|
||||
count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey())));
|
||||
count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), filterCounts.getKey()));
|
||||
}
|
||||
|
||||
if ( performanceLog != null ) performanceLog.close();
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
|
|||
import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
|
|
@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
return "dups";
|
||||
}
|
||||
|
||||
private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
|
||||
private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
|
||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
|
||||
ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();
|
||||
|
||||
l.add(read);
|
||||
for (SAMRecord read2 : iter) {
|
||||
|
|
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
iter.pushback(read2);
|
||||
break;
|
||||
} else {
|
||||
l.add(read2);
|
||||
l.add((GATKSAMRecord) read2);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
* @param reads the list of reads to split into unique molecular samples
|
||||
* @return
|
||||
*/
|
||||
protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) {
|
||||
Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>();
|
||||
protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
|
||||
Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();
|
||||
|
||||
// for each read, find duplicates, and either add the read to its duplicate list or start a new one
|
||||
for ( SAMRecord read : reads ) {
|
||||
List<SAMRecord> readSet = findDuplicateReads(read, readSets);
|
||||
for ( GATKSAMRecord read : reads ) {
|
||||
List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);
|
||||
|
||||
if ( readSet == null ) {
|
||||
readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read))); // copy so I can add to the list
|
||||
readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read))); // copy so I can add to the list
|
||||
} else {
|
||||
readSet.add(read);
|
||||
}
|
||||
|
|
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
* @param readSets
|
||||
* @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
|
||||
*/
|
||||
protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
|
||||
protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
|
||||
if ( read.getReadPairedFlag() ) {
|
||||
// paired
|
||||
final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
|
||||
|
||||
for (List<SAMRecord> reads : readSets) {
|
||||
SAMRecord key = reads.get(0);
|
||||
for (List<GATKSAMRecord> reads : readSets) {
|
||||
GATKSAMRecord key = reads.get(0);
|
||||
|
||||
// read and key start at the same place, and either the this read and the key
|
||||
// share a mate location or the read is flagged as a duplicate
|
||||
|
|
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
}
|
||||
}
|
||||
} else {
|
||||
for (List<SAMRecord> reads : readSets) {
|
||||
SAMRecord key = reads.get(0);
|
||||
for (List<GATKSAMRecord> reads : readSets) {
|
||||
GATKSAMRecord key = reads.get(0);
|
||||
boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
|
||||
//System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
|
||||
// read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
|
||||
|
|
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
|
|||
// get the genome loc from the read
|
||||
GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
|
||||
|
||||
Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
|
||||
Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
|
||||
if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
|
||||
|
||||
// Jump forward in the reference to this locus location
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
import org.broadinstitute.sting.gatk.walkers.DataSource;
|
||||
import org.broadinstitute.sting.gatk.walkers.ReadWalker;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/*
|
||||
* Copyright (c) 2009 The Broad Institute
|
||||
|
|
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
|
|||
// if the read is mapped, create a metadata tracker
|
||||
ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
|
||||
|
||||
final boolean keepMeP = walker.filter(refContext, read);
|
||||
final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
|
||||
if (keepMeP) {
|
||||
M x = walker.map(refContext, read, tracker); // the tracker can be null
|
||||
M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
|
||||
sum = walker.reduce(x, sum);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import net.sf.picard.reference.ReferenceSequence;
|
||||
import net.sf.picard.reference.ReferenceSequenceFile;
|
||||
import net.sf.picard.reference.ReferenceSequenceFileFactory;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import net.sf.samtools.util.StringUtil;
|
||||
import org.broadinstitute.sting.commandline.Advanced;
|
||||
import org.broadinstitute.sting.commandline.Argument;
|
||||
|
|
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
|
|||
import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
|
||||
import org.broadinstitute.sting.utils.clipreads.ReadClipper;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @param read the read itself, as a GATKSAMRecord
|
||||
* @return the ReadClipper object describing what should be done to clip this read
|
||||
*/
|
||||
public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
|
||||
if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
|
||||
read = ReadUtils.replaceSoftClipsWithMatches(read);
|
||||
|
|
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
*/
|
||||
private void clipSequences(ReadClipperWithData clipper) {
|
||||
if (sequencesToClip != null) { // don't bother if we don't have any sequences to clip
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (SeqToClip stc : sequencesToClip) {
|
||||
|
|
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
* @param stop
|
||||
* @return
|
||||
*/
|
||||
private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
|
||||
private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
|
||||
if (read.getReadNegativeStrandFlag())
|
||||
return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
|
||||
else
|
||||
|
|
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
*/
|
||||
private void clipCycles(ReadClipperWithData clipper) {
|
||||
if (cyclesToClip != null) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
|
||||
for (Pair<Integer, Integer> p : cyclesToClip) { // iterate over each cycle range
|
||||
|
|
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
* @param clipper
|
||||
*/
|
||||
private void clipBadQualityScores(ReadClipperWithData clipper) {
|
||||
SAMRecord read = clipper.getRead();
|
||||
GATKSAMRecord read = clipper.getRead();
|
||||
ClippingData data = clipper.getData();
|
||||
int readLen = read.getReadBases().length;
|
||||
byte[] quals = read.getBaseQualities();
|
||||
|
|
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
if ( clipper == null )
|
||||
return data;
|
||||
|
||||
SAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
|
||||
GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
|
||||
if (outputBam != null) {
|
||||
outputBam.addAlignment(clippedRead);
|
||||
} else {
|
||||
|
|
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
|
|||
public class ReadClipperWithData extends ReadClipper {
|
||||
private ClippingData data;
|
||||
|
||||
public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
|
||||
super(read);
|
||||
data = new ClippingData(clipSeqs);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
|
||||
import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
|
@ -20,11 +20,11 @@ import java.util.Set;
|
|||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
|
||||
public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
|
||||
public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
|
||||
return true; // We are keeping all the reads
|
||||
}
|
||||
|
||||
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets );
|
||||
public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );
|
||||
|
||||
// Given result of map function
|
||||
public abstract ReduceType reduceInit();
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
|||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.text.XReadLines;
|
||||
|
||||
import java.io.File;
|
||||
|
|
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite
|
|||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
return namesToKeep.contains(read.getReadName());
|
||||
}
|
||||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,9 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.commandline.Output;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
import java.io.PrintStream;
|
||||
import java.text.DecimalFormat;
|
||||
|
|
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
|
|||
|
||||
private FlagStat myStat = new FlagStat();
|
||||
|
||||
public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
myStat.readCount++;
|
||||
if (read.getReadFailsVendorQualityCheckFlag()) {
|
||||
myStat.QC_failure++;
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
|||
*/
|
||||
@By(DataSource.READS)
|
||||
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.INTERVAL)
|
||||
@PartitionBy(PartitionType.LOCUS)
|
||||
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
|
||||
public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
// Do we actually want to operate on the context?
|
||||
|
|
|
|||
|
|
@ -34,6 +34,12 @@ public enum PartitionType {
|
|||
*/
|
||||
NONE,
|
||||
|
||||
/**
|
||||
* The walker inputs can be chunked down to individual
|
||||
* reads.
|
||||
*/
|
||||
READ,
|
||||
|
||||
/**
|
||||
* The walker inputs can be chunked down to the
|
||||
* per-locus level.
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ import java.util.TreeSet;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
|
||||
|
|
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/**
|
||||
* The reads filter function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return true if the read passes the filter, false if it doesn't
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
// check the read group
|
||||
if ( readGroup != null ) {
|
||||
SAMReadGroupRecord myReadGroup = read.getReadGroup();
|
||||
|
|
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
|
|||
|
||||
/**
|
||||
* The reads map function.
|
||||
*
|
||||
* @param ref the reference bases that correspond to our read, if a reference was provided
|
||||
* @param read the read itself, as a SAMRecord
|
||||
* @return the read itself
|
||||
*/
|
||||
public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
|
|
@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
|
||||
@PartitionBy(PartitionType.CONTIG)
|
||||
@PartitionBy(PartitionType.READ)
|
||||
public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
|
||||
public boolean requiresOrderedReads() { return false; }
|
||||
|
||||
|
|
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
|
|||
/** Must return true for reads that need to be processed. Reads, for which this method return false will
|
||||
* be skipped by the engine and never passed to the walker.
|
||||
*/
|
||||
public boolean filter(ReferenceContext ref, SAMRecord read) {
|
||||
public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
|
||||
// We are keeping all the reads
|
||||
return true;
|
||||
}
|
||||
|
||||
// Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
|
||||
public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
|
||||
public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
|
|||
import org.broadinstitute.sting.commandline.Argument;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
||||
import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
|
||||
import org.broadinstitute.sting.utils.sam.ReadUtils;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
|
|||
logger.info("SplitSamFile version: " + VERSION);
|
||||
}
|
||||
|
||||
public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
|
||||
return read;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,15 +25,17 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.CommandLineGATK;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
|
||||
import org.broadinstitute.sting.gatk.samples.Sample;
|
||||
import org.broadinstitute.sting.gatk.samples.SampleDB;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.collections.Pair;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
|
|
@ -77,6 +79,23 @@ public abstract class Walker<MapType, ReduceType> {
|
|||
return toolkit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the master sequence dictionary for this walker
|
||||
* @link GenomeAnalysisEngine.getMasterSequenceDictionary
|
||||
* @return
|
||||
*/
|
||||
protected SAMSequenceDictionary getMasterSequenceDictionary() {
|
||||
return getToolkit().getMasterSequenceDictionary();
|
||||
}
|
||||
|
||||
protected SampleDB getSampleDB() {
|
||||
return getToolkit().getSampleDB();
|
||||
}
|
||||
|
||||
protected Sample getSample(final String id) {
|
||||
return getToolkit().getSampleDB().getSample(id);
|
||||
}
|
||||
|
||||
/**
|
||||
* (conceptual static) method that states whether you want to see reads piling up at a locus
|
||||
* that contain a deletion at the locus.
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
|
|||
continue;
|
||||
}
|
||||
// todo -- actually care about indel length from the pileup (agnostic at the moment)
|
||||
int refCount = indelPileup.size();
|
||||
int refCount = indelPileup.getNumberOfElements();
|
||||
int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
|
||||
|
||||
if ( refCount + altCount == 0 ) {
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
|
|||
if (!g.isHet())
|
||||
return null;
|
||||
|
||||
Set<Allele> altAlleles = vc.getAlternateAlleles();
|
||||
Collection<Allele> altAlleles = vc.getAlternateAlleles();
|
||||
if ( altAlleles.size() == 0 )
|
||||
return null;
|
||||
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue