Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-09-27 11:07:01 -04:00 · 2011-09-27 11:07:01 -04:00 · e5ce5e265a
parent 7fefc224ba 26e71f6688
commit e5ce5e265a
203 changed files with 5112 additions and 4149 deletions
--- a/build.xml
+++ b/build.xml
@ -163,6 +163,14 @@
      <!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
      <delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
      <delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
      <!--
         An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
         Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
         Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
        -->
      <delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
      <delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
      <delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
    </target>
    <target name="init.buildall">
@ -709,53 +717,6 @@
        </antcall>
    </target>
    <target name="test.init.compile">
        <mkdir dir="${java.test.classes}"/>
        <mkdir dir="${scala.test.classes}"/>
        <antcall target="resolve">
          <param name="ivy.conf" value="test"/>
        </antcall>            
    </target>
    <target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
        <echo message="Sting: Compiling test cases!"/>
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
            <src path="${java.public.test.sources}"/>
            <src path="${java.private.test.sources}"/>
            <classpath>
                <path refid="external.dependencies" />
                <pathelement location="${java.classes}"/>
                <pathelement location="${java.contracts}"/>
                <pathelement location="${lib.dir}/testng-5.14.1.jar"/>
            </classpath>
            <compilerarg value="-proc:none"/>
 <!-- 
            <compilerarg value="-Acom.google.java.contract.debug"/>
            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
 -->
        </javac>
    </target>
    <target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
        <echo message="Scala: Compiling test cases!"/>
        <antcall target="resolve">
          <param name="ivy.conf" value="test"/>
        </antcall>          
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
 	        <src path="${scala.public.test.sources}" />
 	        <src path="${scala.private.test.sources}" />
            <include name="**/*.scala"/>
            <classpath>
                <path refid="scala.dependencies"/>
                <pathelement location="${scala.test.classes}"/>
                <pathelement location="${java.test.classes}"/>
                <pathelement location="${lib.dir}/testng-5.14.1.jar"/>
            </classpath>
        </scalac>
    </target>
    <target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
    <!-- new scala target -->
    <target name="scala" description="build the scala directory">
@ -769,20 +730,113 @@
    <!-- ***************************************************************************** -->
    <!-- where to put reports and tests-->
    <property name="report" value="${build.dir}/report"/>
    <property name="java.test.classes" value="${build.dir}/java/testclasses"/>
    <property name="test.output" value="${dist.dir}/test"/>
-    <property name="java.public.test.sources" value="public/java/test"/>
+    <property name="java.test.classes" value="${build.dir}/java/testclasses"/>
-    <property name="java.private.test.sources" value="private/java/test"/>
+    <property name="java.public.test.classes" value="${java.test.classes}/public"/>
    <property name="java.private.test.classes" value="${java.test.classes}/private"/>
    <property name="java.public.test.sources" value="${public.dir}/java/test"/>
    <property name="java.private.test.sources" value="${private.dir}/java/test"/>
    <property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
-    <property name="scala.public.test.sources" value="public/scala/test"/>
+    <property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
-    <property name="scala.private.test.sources" value="private/scala/test"/>
+    <property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
    <property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
    <property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
    <property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
    <!-- provide a ceiling on the memory that unit/integration tests can consume. -->
    <property name="test.maxmemory" value="4g"/>
    <target name="test.init.compile">
        <mkdir dir="${java.test.classes}"/>
        <mkdir dir="${scala.test.classes}"/>
        <antcall target="resolve">
          <param name="ivy.conf" value="test"/>
        </antcall>
    </target>
   <target name="test.java.public.compile" depends="dist,test.init.compile">
        <mkdir dir="${java.public.test.classes}"/>
        <echo message="Sting: Compiling public test cases!"/>
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
            <src path="${java.public.test.sources}"/>
            <classpath>
                <path refid="external.dependencies" />
                <pathelement location="${java.classes}"/>
                <pathelement location="${java.contracts}"/>
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
 <!--
            <compilerarg value="-Acom.google.java.contract.debug"/>
            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
 -->
        </javac>
    </target>
    <target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
        <mkdir dir="${java.private.test.classes}"/>
        <echo message="Sting: Compiling private test cases!"/>
        <javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
            <src path="${java.private.test.sources}"/>
            <classpath>
                <path refid="external.dependencies" />
                <pathelement location="${java.public.test.classes}"/>
                <pathelement location="${java.classes}"/>
                <pathelement location="${java.contracts}"/>
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
 <!--
            <compilerarg value="-Acom.google.java.contract.debug"/>
            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
 -->
        </javac>
    </target>
    <target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
    <target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
        <mkdir dir="${scala.public.test.classes}"/>
        <echo message="Scala: Compiling public test cases!"/>
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
 	        <src path="${scala.public.test.sources}" />
            <classpath>
                <path refid="scala.dependencies"/>
                <pathelement location="${java.public.test.classes}"/>
                <pathelement location="${testng.jar}"/>
            </classpath>
        </scalac>
    </target>
    <target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
        <mkdir dir="${scala.private.test.classes}"/>
        <echo message="Scala: Compiling private test cases!"/>
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
 	        <src path="${scala.private.test.sources}" />
            <classpath>
                <path refid="scala.dependencies"/>
                <pathelement location="${scala.public.test.classes}"/>
                <pathelement location="${java.public.test.classes}"/>
                <pathelement location="${java.private.test.classes}"/>
                <pathelement location="${testng.jar}"/>
            </classpath>
        </scalac>
    </target>
    <target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
    <target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
    <!-- TEST -->
    <macrodef name="run-test">
        <attribute name="testtype"/>
        <attribute name="outputdir"/>
        <attribute name="runfailed"/>
        <sequential>
            <condition property="run.failed.tests">
                <equals arg1="@{runfailed}" arg2="true"/>
            </condition>
            <!-- Get the pipeline run type.  Default to dry.  -->
            <condition property="pipeline.run" value="dry" else="${pipeline.run}">
                <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
@ -792,10 +846,10 @@
                <isset property="include.contracts" />
            </condition>
-            <mkdir dir="${report}/@{testtype}"/>
+            <mkdir dir="@{outputdir}"/>
            <echo message="Sting: Running @{testtype} test cases!"/>
-            <taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
+            <taskdef resource="testngtasks" classpath="${testng.jar}"/>
-            <testng outputDir="${report}/@{testtype}"
+            <testng outputDir="@{outputdir}"
                    haltOnFailure="false" failureProperty="test.failure"
                    verbose="2"
                    workingDir="${basedir}"
@ -813,117 +867,108 @@
                    <pathelement location="${java.classes}" />
                    <pathelement location="${scala.classes}" />
                    <pathelement location="${java.contracts}" />
-                    <pathelement location="${java.test.classes}" />
+                    <pathelement location="${java.public.test.classes}" />
-                    <pathelement location="${scala.test.classes}" />
+                    <pathelement location="${java.private.test.classes}" />
                    <pathelement location="${scala.public.test.classes}" />
                    <pathelement location="${scala.private.test.classes}" />
                </classpath>
-                <classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
+                <classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
-                <classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
+                <classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
                    <include name="**/@{testtype}.class" if="include.private"/>
                </classfileset>
                <classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
                    <include name="**/@{testtype}*.class" if="scala.include"/>
                </classfileset>
                <classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
                    <include name="**/@{testtype}*.class" if="include.scala.private"/>
                </classfileset>
                <xmlfileset dir="${basedir}">
                    <include name="@{testtype}" if="run.failed.tests"/>
                </xmlfileset>
            </testng>
            <!-- generate a report for Bamboo or Hudson to read in -->
-            <junitreport todir="${report}/@{testtype}">
+            <junitreport todir="@{outputdir}">
-                <fileset dir="${report}/@{testtype}">
+                <fileset dir="@{outputdir}">
                    <include name="*/*.xml"/>
                </fileset>
-                <report format="noframes" todir="${report}/@{testtype}"/>
+                <report format="noframes" todir="@{outputdir}"/>
            </junitreport>
            <fail message="test failed" if="test.failure" />
        </sequential>
    </macrodef>
    <!-- FAILED-TEST -->
    <macrodef name="run-failed-test">
        <attribute name="xmlfailedtestfile" />
        <sequential>
            <!-- Get the pipeline run type.  Default to dry.  -->
            <condition property="pipeline.run" value="dry" else="${pipeline.run}">
                <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
            </condition>
            <condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
                <isset property="include.contracts" />
            </condition>
            <mkdir dir="${report}/failed_rerun" />
            <echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
            <taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
            <testng outputDir="${report}/failed_rerun"
                    haltOnFailure="false" failureProperty="test.failure"
                    verbose="2"
                    workingDir="${basedir}"
                    useDefaultListeners="false"
                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
                <jvmarg value="-Xmx${test.maxmemory}" />
                <jvmarg value="-Djava.awt.headless=true" />
                <jvmarg value="-Dpipeline.run=${pipeline.run}" />
                <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
                <jvmarg line="${cofoja.jvm.args}"/>
 <!--                 <jvmarg value="-Xdebug"/> -->
 <!--                 <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
                <classpath>
                    <path refid="external.dependencies" />
                    <pathelement location="${java.classes}" />
                    <pathelement location="${scala.classes}" />
                    <pathelement location="${java.contracts}" />
                    <pathelement location="${java.test.classes}" />
                    <pathelement location="${scala.test.classes}" />
                </classpath>
                <xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
            </testng>
            <fail message="test failed" if="test.failure" />
        </sequential>
    </macrodef>
-    <!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
+    <target name="alltests">
-    <target name="test" depends="test.compile" description="Run unit tests">
+        <antcall target="test" inheritAll="false"/>
        <antcall target="integrationtest" inheritAll="false"/>
        <antcall target="pipelinetest" inheritAll="false"/>
    </target>
    <target name="alltests.public">
        <antcall target="test.public" inheritAll="false"/>
        <antcall target="integrationtest.public" inheritAll="false"/>
        <antcall target="pipelinetest.public" inheritAll="false"/>
    </target>
    <!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
    <target name="test" depends="init.buildall,test.compile" description="Run unit tests">
        <condition property="ttype" value="*UnitTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${ttype}"/>
+        <run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
    </target>
-    <target name="integrationtest" depends="test.compile" description="Run integration tests">
+    <target name="test.public" depends="init.buildpublic,test"/>
    <target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
        <condition property="itype" value="*IntegrationTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${itype}"/>
+        <run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
    </target>
-    <target name="performancetest" depends="test.compile" description="Run performance tests">
+    <target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
    <target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
       <condition property="ptype" value="*PerformanceTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${ptype}"/>
+        <run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
    </target>
-    <target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
+    <target name="performancetest.public" depends="init.buildpublic,performancetest" />
    <target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
        <condition property="pipetype" value="*PipelineTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${pipetype}"/>
+        <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
    </target>
-    <target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
+    <target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
    <target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
        <property name="pipeline.run" value="run"/>
        <condition property="pipetype" value="*PipelineTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${pipetype}"/>
+        <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
    </target>
    <target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
    <target name="failed-test" depends="init.buildall,test.compile">
        <run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>
-    <target name="failed-test" depends="test.compile">
+    <target name="failed-integration" depends="init.buildall,test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
+        <run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>
-    <target name="failed-integration" depends="test.compile">
+    <target name="failed-performance" depends="init.buildall,test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
+        <run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>
-    <target name="failed-performance" depends="test.compile">
+    <target name="failed-pipeline" depends="init.buildall,test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
+        <run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>
    <target name="failed-pipeline" depends="test.compile">
        <run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
    </target>
    <!-- ******************************************************************************** -->
--- a/ivy.xml
+++ b/ivy.xml
@ -15,10 +15,8 @@
    <!-- Tribble -->
    <dependency org="org.broad" name="tribble" rev="latest.integration"/>
-    <dependency org="log4j" name="log4j" rev="1.2.15">
+    <dependency org="log4j" name="log4j" rev="1.2.15"/>
-      <!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
+    <dependency org="javax.mail" name="mail" rev="1.4.4"/>
      <exclude org="javax.mail" />
    </dependency>
    <dependency org="colt" name="colt" rev="1.2.0"/>
    <dependency org="jboss" name="javassist" rev="3.7.ga"/>
    <dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
--- a/public/R/queueJobReport.R
+++ b/public/R/queueJobReport.R
@ -12,14 +12,14 @@ if ( onCMDLine ) {
  inputFileName = args[1]
  outputPDF = args[2]
 } else {
-  #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
+  inputFileName = "~/Desktop/Q-30033@gsa1.jobreport.txt"
-  inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
+  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
  outputPDF = NA
 }
-RUNTIME_UNITS = "(sec)"
+RUNTIME_UNITS = "(hours)"
-ORIGINAL_UNITS_TO_SECONDS = 1/1000
+ORIGINAL_UNITS_TO_SECONDS = 1/1000/60/60
 # 
 # Helper function to aggregate all of the jobs in the report across all tables
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
 #
 # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
 #
-plotJobsGantt <- function(gatkReport, sortOverall) {
+plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
  allJobs = allJobsFromReport(gatkReport)
  if ( sortOverall ) {
    title = "All jobs, by analysis, by start time"
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
  }
  allJobs$index = 1:nrow(allJobs)
  minTime = min(allJobs$startTime)
-  allJobs$relStartTime = allJobs$startTime - minTime
+  allJobs$relStartTime = (allJobs$startTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
-  allJobs$relDoneTime = allJobs$doneTime - minTime
+  allJobs$relDoneTime = (allJobs$doneTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
  allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
  maxRelTime = max(allJobs$relDoneTime)
  p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
-  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
+  p <- p + theme_bw()
-  p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
+  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
  if ( includeText )
    p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
  p <- p + xlim(0, maxRelTime * 1.1)
  p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
-  p <- p + ylab("Job")
+  p <- p + ylab("Job number")
  p <- p + opts(title=title)
  print(p)
 }
@ -140,6 +142,8 @@ print(paste("Project          :", inputFileName))
 convertUnits <- function(gatkReportData) {
  convertGroup <- function(g) {
    g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
    g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
    g
  }
  lapply(gatkReportData, convertGroup)
@ -155,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
  pdf(outputPDF, height=8.5, width=11)
 } 
-plotJobsGantt(gatkReportData, T)
+plotJobsGantt(gatkReportData, T, F)
-plotJobsGantt(gatkReportData, F)
+plotJobsGantt(gatkReportData, F, F)
 plotProgressByTime(gatkReportData)
 for ( group in gatkReportData ) {
 plotGroup(group)
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
    private String RECAL_FILE = "output.recal_data.csv";
    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
    private String OUTPUT_DIR = "analyzeCovariates/";
-    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
+    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
    private String PATH_TO_RSCRIPT = "Rscript";
    @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
    private String PATH_TO_RESOURCES = "public/R/";
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                    }
                    if ( tribbleType == null )
-                        if ( ! file.canRead() | !! file.isFile() ) {
+                        if ( ! file.canRead() | ! file.isFile() ) {
                            throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
                        } else {
                            throw new UserException.CommandLineException(
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -929,6 +929,14 @@ public class GenomeAnalysisEngine {
        return readsDataSource.getHeader(reader);
    }
    /**
     * Gets the master sequence dictionary for this GATK engine instance
     * @return a never-null dictionary listing all of the contigs known to this engine instance
     */
    public SAMSequenceDictionary getMasterSequenceDictionary() {
        return getReferenceDataSource().getReference().getSequenceDictionary();
    }
    /**
     * Returns data source object encapsulating all essential info and handlers used to traverse
     * reads; header merger, individual file readers etc can be accessed through the returned data source object.
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;
 import net.sf.picard.util.PeekableIterator;
 import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.GATKChunk;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
            if(currentLocus == GenomeLoc.UNMAPPED) {
                nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
                for(SAMReaderID id: dataSource.getReaderIDs())
-                    nextFilePointer.addFileSpans(id,new GATKBAMFileSpan());
+                    nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
                currentLocus = null;
                continue;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@ -215,6 +215,45 @@ public class GATKBAMIndex {
        return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
    }
    /**
     * Use to get close to the unmapped reads at the end of a BAM file.
     * @return The file offset of the first record in the last linear bin, or -1
     * if there are no elements in linear bins (i.e. no mapped reads).
     */
    public long getStartOfLastLinearBin() {
        openIndexFile();
        seek(4);
        final int sequenceCount = readInteger();
        // Because no reads may align to the last sequence in the sequence dictionary,
        // grab the last element of the linear index for each sequence, and return
        // the last one from the last sequence that has one.
        long lastLinearIndexPointer = -1;
        for (int i = 0; i < sequenceCount; i++) {
            // System.out.println("# Sequence TID: " + i);
            final int nBins = readInteger();
            // System.out.println("# nBins: " + nBins);
            for (int j1 = 0; j1 < nBins; j1++) {
                // Skip bin #
                skipBytes(4);
                final int nChunks = readInteger();
                // Skip chunks
                skipBytes(16 * nChunks);
            }
            final int nLinearBins = readInteger();
            if (nLinearBins > 0) {
                // Skip to last element of list of linear bins
                skipBytes(8 * (nLinearBins - 1));
                lastLinearIndexPointer = readLongs(1)[0];
            }
        }
        closeIndexFile();
        return lastLinearIndexPointer;
    }
    /**
     * Gets the possible number of bins for a given reference sequence.
     * @return How many bins could possibly be used according to this indexing scheme to index a single contig.
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
     */
    public FilePointer next() {
        FilePointer current = wrappedIterator.next();
-        while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
            current = current.combine(parser,wrappedIterator.next());
        return current;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
@ -134,24 +134,11 @@ public class ReadShardStrategy implements ShardStrategy {
            Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
            while(selectedReaders.size() == 0 && currentFilePointer != null) {
                shardPosition = currentFilePointer.fileSpans;
                for(SAMReaderID id: shardPosition.keySet()) {
-                    // If the region contains location information (in other words, it is not at
+                    SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
-                    // the start of the unmapped region), add the region.
+                    if(!fileSpan.isEmpty())
-                    if(currentFilePointer.isRegionUnmapped) {
+                        selectedReaders.put(id,fileSpan);
                        // If the region is unmapped and no location data exists, add a null as an indicator to
                        // start at the next unmapped region.
                        if(!isIntoUnmappedRegion) {
                            selectedReaders.put(id,null);
                            isIntoUnmappedRegion = true;
                        }
                        else
                            selectedReaders.put(id,position.get(id));
                    }
                    else {
                        SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
                        if(!fileSpan.isEmpty())
                            selectedReaders.put(id,fileSpan);
                    }
                }
                if(selectedReaders.size() > 0) {
--- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.ArgumentCollection;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
 * @author Your Name
 * @since Date created
 */
@Hidden
 public class GATKDocsExample extends RodWalker<Integer, Integer> {
    /**
     * Put detailed documentation about the argument here.  No need to duplicate the summary information
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
 * @version 0.1
 */
 public class PlatformFilter extends ReadFilter {
-    @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
+    @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
    protected String[] PLFilterNames;
    public boolean filterOut(SAMRecord rec) {
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        else if ( stub.getOutputStream() != null ) {
            this.file = null;
            this.stream = stub.getOutputStream();
-            writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
+            writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
        }
        else
            throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        }
        // The GATK/Tribble can't currently index block-compressed files on the fly.  Disable OTF indexing even if the user explicitly asked for it.
-        return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
+        return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
@ -25,6 +25,7 @@
 package org.broadinstitute.sting.gatk.io.stubs;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.gatk.CommandLineExecutable;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
        return isCompressed;
    }
    /**
     * Gets the master sequence dictionary from the engine associated with this stub
     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
     * @return
     */
    public SAMSequenceDictionary getMasterSequenceDictionary() {
        return engine.getMasterSequenceDictionary();
    }
    /**
     * Should we tell the VCF writer not to write genotypes?
     * @return true if the writer should not write genotypes.
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -293,15 +293,16 @@ public class GATKRunReport {
     * That is, postReport() is guarenteed not to fail for any reason.
     */
    private File postReportToLocalDisk(File rootDir) {
        String filename = getID() + ".report.xml.gz";
        File file = new File(rootDir, filename);
        try {
            String filename = getID() + ".report.xml.gz";
            File file = new File(rootDir, filename);
            postReportToFile(file);
            logger.debug("Wrote report to " + file);
            return file;
        } catch ( Exception e ) {
            // we catch everything, and no matter what eat the error
            exceptDuringRunReport("Couldn't read report file", e);
            file.delete();
            return null;
        }
    }
@ -312,6 +313,7 @@ public class GATKRunReport {
        File localFile = postReportToLocalDisk(new File("./"));
        logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
        if ( localFile != null ) { // we succeeded in creating the local file
            localFile.deleteOnExit();
            try {
                // stop us from printing the annoying, and meaningless, mime types warning
                Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
@ -336,14 +338,13 @@ public class GATKRunReport {
                //logger.info("Uploading " + localFile + " to AWS bucket");
                S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
                logger.debug("Uploaded to AWS: " + s3Object);
                logger.info("Uploaded run statistics report to AWS S3");
            } catch ( S3ServiceException e ) {
                exceptDuringRunReport("S3 exception occurred", e);
            } catch ( NoSuchAlgorithmException e ) {
                exceptDuringRunReport("Couldn't calculate MD5", e);
            } catch ( IOException e ) {
                exceptDuringRunReport("Couldn't read report file", e);
            } finally {
                localFile.delete();
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram {
        Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
        // add writing of the sequence dictionary, if supplied
-        builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
+        builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());
        // create the output stream, and write the index
        LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
@ -0,0 +1,106 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.refdata.tracks;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.index.Index;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 /**
 * Utilities for working with Sequence Dictionaries embedded in tribble indices
 *
 * @author Your Name
 * @since Date created
 */
 public class IndexDictionaryUtils {
    private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
    // a constant we use for marking sequence dictionary entries in the Tribble index property list
    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
    /**
     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
     * @param index the index file to use
     * @return a SAMSequenceDictionary if available, null if unavailable
     */
    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
                        Integer.valueOf(entry.getValue())));
        }
        return dict;
    }
    /**
     * create the sequence dictionary with the contig list; a backup approach
     * @param index the index file to use
     * @param dict the sequence dictionary to add contigs to
     * @return the filled-in sequence dictionary
     */
    static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
        LinkedHashSet<String> seqNames = index.getSequenceNames();
        if (seqNames == null) {
            return dict;
        }
        for (String name : seqNames) {
            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
            dict.addSequence(seq);
        }
        return dict;
    }
    public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
        for ( SAMSequenceRecord seq : dict.getSequences() ) {
            final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
            final String length = String.valueOf(seq.getSequenceLength());
            index.addProperty(contig,length);
        }
    }
    public static void validateTrackSequenceDictionary(final String trackName,
                                                       final SAMSequenceDictionary trackDict,
                                                       final SAMSequenceDictionary referenceDict,
                                                       final ValidationExclusion.TYPE validationExclusionType ) {
        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
        if (trackDict == null || trackDict.size() == 0)
            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
        else {
            Set<String> trackSequences = new TreeSet<String>();
            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
                trackSequences.add(dictionaryEntry.getSequenceName());
            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.gatk.refdata.tracks;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,11 +50,6 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 /**
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
    private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
    public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
    // a constant we use for marking sequence dictionary entries in the Tribble index property list
    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
    // private sequence dictionary we use to set our tracks with
    private SAMSequenceDictionary dict = null;
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
                try { logger.info(String.format("  Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
                catch (ReviewedStingException e) { }
-                sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
                if (sequenceDictionary.size() == 0 && dict != null) {
                    File indexFile = Tribble.indexFile(inputFile);
-                    setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
+                    validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
-                    sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                    try { // re-write the index
                        writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
                    } catch (IOException e) {
                        logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
                    }
                    sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                }
                if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        // this can take a while, let them know what we're doing
        logger.info("Creating Tribble index in memory for file " + inputFile);
        Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
-        setIndexSequenceDictionary(inputFile, idx, dict, null, false);
+        validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
        return idx;
    }
    // ---------------------------------------------------------------------------------------------------------
    // static functions to work with the sequence dictionaries of indexes
    // ---------------------------------------------------------------------------------------------------------
    /**
     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
     * @param index the index file to use
     * @return a SAMSequenceDictionary if available, null if unavailable
     */
    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
                                 Integer.valueOf(entry.getValue())));
        }
        return dict;
    }
    /**
     * create the sequence dictionary with the contig list; a backup approach
     * @param index the index file to use
     * @param dict the sequence dictionary to add contigs to
     * @return the filled-in sequence dictionary
     */
    private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
        LinkedHashSet<String> seqNames = index.getSequenceNames();
        if (seqNames == null) {
            return dict;
        }
        for (String name : seqNames) {
            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
            dict.addSequence(seq);
        }
        return dict;
    }
    /**
     * set the sequence dictionary of the track.  This function checks that the contig listing of the underlying file is compatible.
     * (that each contig in the index is in the sequence dictionary).
     * @param inputFile for proper error message formatting.
     * @param dict the sequence dictionary
     * @param index the index file
     * @param indexFile the index file
     * @param rewriteIndex should we rewrite the index when we're done?
     *
     */
-    public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
+    public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
-        if (dict == null) return;
+        if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
        SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
        validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
        // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
-        for (SAMSequenceRecord seq : currentDict.getSequences()) {
+        final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
-            if (dict.getSequence(seq.getSequenceName()) == null)
+        validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
-                continue;
+
-            index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
+        // actually update the dictionary in the index
-        }
+        IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
        // re-write the index
        if (rewriteIndex) try {
            writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
        } catch (IOException e) {
            logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
        }
    }
-
+    public void validateTrackSequenceDictionary(final String trackName,
-    public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
+                                                final SAMSequenceDictionary trackDict,
-        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
+                                                final SAMSequenceDictionary referenceDict ) {
-        if (trackDict == null || trackDict.size() == 0)
+        IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
        else {
            Set<String> trackSequences = new TreeSet<String>();
            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
                trackSequences.add(dictionaryEntry.getSequenceName());
            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
@ -24,12 +24,14 @@
 package org.broadinstitute.sting.gatk.report;
 import org.broadinstitute.sting.utils.collections.Pair;
 import java.util.*;
 /**
 * Tracks a linked list of GATKReportColumn in order by name.
 */
-public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
+public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
    private List<String> columnNames = new ArrayList<String>();
    /**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
        columnNames.add(key);
        return super.put(key, value);
    }
    @Override
    public Iterator<GATKReportColumn> iterator() {
        return new Iterator<GATKReportColumn>() {
            int offset = 0;
            public boolean hasNext() { return offset < columnNames.size() ; }
            public GATKReportColumn next() { return getByIndex(offset++); }
            public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
        };
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -286,6 +286,10 @@ public class GATKReportTable {
        }
    }
    public boolean containsKey(Object primaryKey) {
        return primaryKeyColumn.contains(primaryKey);
    }
    /**
     * Set the value for a given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -358,7 +358,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
    public void printOnTraversalDone() {
        printProgress(null, null, true);
-        final double elapsed = timer.getElapsedTime();
+        final double elapsed = timer == null ? 0 : timer.getElapsedTime();
        ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();        
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
@ -26,21 +26,23 @@
 package org.broadinstitute.sting.gatk.walkers;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
 import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
 import java.io.PrintStream;
 import java.util.Iterator;
 /**
 * Prints out all of the RODs in the input data set. Data is rendered using the toString() method
 * of the given ROD.
 */
 public class PrintRODsWalker extends RodWalker<Integer, Integer> {
    @Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
    public RodBinding<Feature> input;
    @Output
    PrintStream out;
@ -62,7 +64,7 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
        if ( tracker == null )
            return 0;
-        for ( Feature feature : tracker.getValues(Feature.class) ) {
+        for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
            out.println(feature.toString());
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 *   -I input1.bam \
 *   -I input2.bam \
 *   --read_filter MappingQualityZero
 *
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
 *   -T PrintReads \
 *   -o output.bam \
 *   -I input.bam \
 *   -n 2000
 * </pre>
 *
 */
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -25,6 +25,7 @@
 package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -77,6 +78,15 @@ public abstract class Walker<MapType, ReduceType> {
        return toolkit;
    }
    /**
     * Gets the master sequence dictionary for this walker
     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
     * @return
     */
    protected SAMSequenceDictionary getMasterSequenceDictionary() {
        return getToolkit().getMasterSequenceDictionary();
    }
    /**
     * (conceptual static) method that states whether you want to see reads piling up at a locus
     * that contain a deletion at the locus.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -43,6 +43,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
 */
 public class AlleleBalance extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;
 /**
 * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
 */
 public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java
@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import java.util.Map;
-
+/**
-
+ * Abstract base class for all annotations that are normalized by depth
 */
 public abstract class AnnotationByDepth extends InfoFieldAnnotation {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
@ -47,6 +47,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Count of A, C, G, T bases across all samples
 */
 public class BaseCounts extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
 import java.util.List;
 /**
 * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
 */
 public class BaseQualityRankSumTest extends RankSumTest {
    public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
@ -44,6 +44,11 @@ import java.util.List;
 import java.util.Map;
 /**
 * Allele count in genotypes, for each ALT allele, in the same order as listed;
 * allele Frequency, for each ALT allele, in the same order as listed; total number
 * of alleles in called genotypes.
 */
 public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {
    private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -16,7 +16,23 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
 * Total (unfiltered) depth over all samples.
 *
 * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
 * at this site.  The DP field describe the total depth of reads that passed the Unified Genotypers internal
 * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
 * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
 * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
 * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
 * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
 * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
 * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
 * normally be excluded from the statistical calculations going into GQ and QUAL.
 *
 * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
 * -dcov D is N * D
 */
 public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@ -23,6 +23,25 @@ import java.util.List;
 import java.util.Map;
 /**
 * The depth of coverage of each VCF allele in this sample.
 *
 * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
 * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
 * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
 * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
 * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
 * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
 * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
 * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
 * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
 * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
 * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
 * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
 * because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
 * assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
 * determine the genotype calls (see below).
 */
 public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {
    private static String REF_ALLELE = "REF";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;
 /**
 * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
 * being seen on only the forward or only the reverse strand) in the reads? More bias is
 * indicative of false positive calls.
 */
 public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
    private static final String FS = "FS";
    private static final double MIN_PVALUE = 1E-320;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
 */
 public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;
 /**
 * Consistency of the site with two (and only two) segregating haplotypes. Higher scores
 * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
 */
 public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
    private final static boolean DEBUG = false;
    private final static int MIN_CONTEXT_WING_SIZE = 10;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
@ -19,6 +19,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
 */
 public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {
    private static final int MIN_SAMPLES = 10;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
@ -16,7 +16,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
 * Largest contiguous homopolymer run of the variant allele in either direction on the reference.
 */
 public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {
    private boolean ANNOTATE_INDELS = true;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java
@ -17,14 +17,15 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 /**
 * Created by IntelliJ IDEA.
 * User: rpoplin
 * Date: 5/16/11
 */
-// A set of annotations calculated directly from the GLs
+/**
-public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
+ * Likelihood-based (using PL field) test for the inbreeding among samples.
 *
 * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
 * well with limited coverage per sample.  See the 1000 Genomes Phase I release for
 * more information.
 */
 public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {
    private static final int MIN_SAMPLES = 10;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java
@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;
 /**
- * Created by IntelliJ IDEA.
+ * Rough category of indel type (insertion, deletion, multi-allelic, other)
 * User: delangel
 * Date: Mar 11, 2011
 * Time: 11:47:33 AM
 * To change this template use File | Settings | File Templates.
 */
 public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
 */
 public class LowMQ extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
 import java.util.List;
 /**
 * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
 */
 public class MappingQualityRankSumTest extends RankSumTest {
    public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
@ -19,6 +19,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Total count across all samples of mapping quality zero reads
 */
 public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
@ -44,11 +44,7 @@ import java.util.List;
 import java.util.Map;
 /**
- * Created by IntelliJ IDEA.
+ * Count for each sample of mapping quality zero reads
 * User: asivache
 * Date: Feb 4, 2011
 * Time: 6:46:25 PM
 * To change this template use File | Settings | File Templates.
 */
 public class MappingQualityZeroBySample extends GenotypeAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker,
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
@ -17,8 +17,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
-
+ * Fraction of all reads across samples that have mapping quality zero
 */
 public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
@ -17,11 +17,8 @@ import java.util.List;
 import java.util.Map;
 /**
- * Created by IntelliJ IDEA.
+ * The number of N bases, counting only SOLiD data
 * User: rpoplin
 * Date: 5/16/11
 */
 public class NBaseCount extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if( stratifiedContexts.size() == 0 )
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
 * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
 *
 * Low scores are indicative of false positive calls and artifacts.
 */
 public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
@ -21,6 +21,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Root Mean Square of the mapping quality of the reads across all samples.
 */
 public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@ -21,7 +21,9 @@ import java.util.List;
 import java.util.Map;
-
+/**
 * Abstract root for all RankSum based annotations
 */
 public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
    static final double INDEL_LIKELIHOOD_THRESH = 0.1;
    static final boolean DEBUG = false;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
@ -25,6 +25,7 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -47,12 +48,9 @@ import java.util.List;
 import java.util.Map;
 /**
- * Created by IntelliJ IDEA.
+ * Unsupported
 * User: asivache
 * Date: Feb 4, 2011
 * Time: 3:59:27 PM
 * To change this template use File | Settings | File Templates.
 */
@Hidden
 public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
        private static String REF_ALLELE = "REF";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
 import java.util.List;
 /**
- * Created by IntelliJ IDEA.
+ * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
 * User: rpoplin
 * Date: 3/30/11
 */
 public class ReadPosRankSumTest extends RankSumTest {
    public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java
@ -15,8 +15,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
-
+ * SB annotation value by depth of alt containing samples
 */
 public class SBByDepth extends AnnotationByDepth {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
@ -26,7 +27,7 @@ public class SBByDepth extends AnnotationByDepth {
        if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
            return null;
-        double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
+        double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);
        final Map<String, Genotype> genotypes = vc.getGenotypes();
        if ( genotypes == null || genotypes.size() == 0 )
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
@ -41,7 +41,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
+/**
 * List all of the samples in the info field
 */
 public class SampleList extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@ -24,7 +24,9 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -46,134 +45,522 @@ import java.util.*;
 * (http://snpeff.sourceforge.net/).
 *
 * For each variant, chooses one of the effects of highest biological impact from the SnpEff
- * output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
+ * output file (which must be provided on the command line via --snpEffFile filename.vcf),
 * and adds annotations on that effect.
 *
 * The possible biological effects and their associated impacts are defined in the class:
 * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
 *
 * @author David Roazen
 */
 public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
-    // SnpEff annotation key names:
+    private static Logger logger = Logger.getLogger(SnpEff.class);
-    public static final String GENE_ID_KEY = "GENE_ID";
+
-    public static final String GENE_NAME_KEY = "GENE_NAME";
+    // We refuse to parse SnpEff output files generated by unsupported versions, or
-    public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
+    // lacking a SnpEff version number in the VCF header:
-    public static final String EXON_ID_KEY = "EXON_ID";
+    public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
-    public static final String EXON_RANK_KEY = "EXON_RANK";
+    public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
-    public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
+    public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
-    public static final String EFFECT_KEY = "EFFECT";
+
-    public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
+    // When we write the SnpEff version number and command line to the output VCF, we change
-    public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
+    // the key name slightly so that the output VCF won't be confused in the future for an
-    public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
+    // output file produced by SnpEff directly:
-    public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
+    public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
-    public static final String CODON_NUM_KEY = "CODON_NUM";
+    public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
-    public static final String CDS_SIZE_KEY = "CDS_SIZE";
+
    // SnpEff aggregates all effects (and effect metadata) together into a single INFO
    // field annotation with the key EFF:
    public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
    public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
    public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
    // Key names for the INFO field annotations we will add to each record, along
    // with parsing-related information:
    public enum InfoFieldKey {
        EFFECT_KEY            ("SNPEFF_EFFECT",           -1),
        IMPACT_KEY            ("SNPEFF_IMPACT",            0),
        CODON_CHANGE_KEY      ("SNPEFF_CODON_CHANGE",      1),
        AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
        GENE_NAME_KEY         ("SNPEFF_GENE_NAME",         3),
        GENE_BIOTYPE_KEY      ("SNPEFF_GENE_BIOTYPE",      4),
        TRANSCRIPT_ID_KEY     ("SNPEFF_TRANSCRIPT_ID",     6),
        EXON_ID_KEY           ("SNPEFF_EXON_ID",           7),
        FUNCTIONAL_CLASS_KEY  ("SNPEFF_FUNCTIONAL_CLASS", -1);
        // Actual text of the key
        private final String keyName;
        // Index within the effect metadata subfields from the SnpEff EFF annotation
        // where each key's associated value can be found during parsing.
        private final int fieldIndex;
        InfoFieldKey ( String keyName, int fieldIndex ) {
            this.keyName = keyName;
            this.fieldIndex = fieldIndex;
        }
        public String getKeyName() {
            return keyName;
        }
        public int getFieldIndex() {
            return fieldIndex;
        }
    }
    // Possible SnpEff biological effects. All effect names found in the SnpEff input file
    // are validated against this list.
    public enum EffectType {
        // High-impact effects:
        FRAME_SHIFT                           (EffectFunctionalClass.NONE,     false),
        STOP_GAINED                           (EffectFunctionalClass.NONSENSE, false),
        START_LOST                            (EffectFunctionalClass.NONE,     false),
        SPLICE_SITE_ACCEPTOR                  (EffectFunctionalClass.NONE,     false),
        SPLICE_SITE_DONOR                     (EffectFunctionalClass.NONE,     false),
        EXON_DELETED                          (EffectFunctionalClass.NONE,     false),
        STOP_LOST                             (EffectFunctionalClass.NONE,     false),
        // Moderate-impact effects:
        NON_SYNONYMOUS_CODING                 (EffectFunctionalClass.MISSENSE, false),
        CODON_CHANGE                          (EffectFunctionalClass.NONE,     false),
        CODON_INSERTION                       (EffectFunctionalClass.NONE,     false),
        CODON_CHANGE_PLUS_CODON_INSERTION     (EffectFunctionalClass.NONE,     false),
        CODON_DELETION                        (EffectFunctionalClass.NONE,     false),
        CODON_CHANGE_PLUS_CODON_DELETION      (EffectFunctionalClass.NONE,     false),
        UTR_5_DELETED                         (EffectFunctionalClass.NONE,     false),
        UTR_3_DELETED                         (EffectFunctionalClass.NONE,     false),
        // Low-impact effects:
        SYNONYMOUS_CODING                     (EffectFunctionalClass.SILENT,   false),
        SYNONYMOUS_START                      (EffectFunctionalClass.SILENT,   false),
        NON_SYNONYMOUS_START                  (EffectFunctionalClass.SILENT,   false),
        SYNONYMOUS_STOP                       (EffectFunctionalClass.SILENT,   false),
        NON_SYNONYMOUS_STOP                   (EffectFunctionalClass.SILENT,   false),
        START_GAINED                          (EffectFunctionalClass.NONE,     false),
        // Modifiers:
        NONE                                  (EffectFunctionalClass.NONE,     true),
        CHROMOSOME                            (EffectFunctionalClass.NONE,     true),
        INTERGENIC                            (EffectFunctionalClass.NONE,     true),
        UPSTREAM                              (EffectFunctionalClass.NONE,     true),
        UTR_5_PRIME                           (EffectFunctionalClass.NONE,     true),
        CDS                                   (EffectFunctionalClass.NONE,     true),
        GENE                                  (EffectFunctionalClass.NONE,     true),
        TRANSCRIPT                            (EffectFunctionalClass.NONE,     true),
        EXON                                  (EffectFunctionalClass.NONE,     true),
        INTRON                                (EffectFunctionalClass.NONE,     true),
        UTR_3_PRIME                           (EffectFunctionalClass.NONE,     true),
        DOWNSTREAM                            (EffectFunctionalClass.NONE,     true),
        INTRON_CONSERVED                      (EffectFunctionalClass.NONE,     true),
        INTERGENIC_CONSERVED                  (EffectFunctionalClass.NONE,     true),
        REGULATION                            (EffectFunctionalClass.NONE,     true),
        CUSTOM                                (EffectFunctionalClass.NONE,     true),
        WITHIN_NON_CODING_GENE                (EffectFunctionalClass.NONE,     true);
        private final EffectFunctionalClass functionalClass;
        private final boolean isModifier;
        EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
            this.functionalClass = functionalClass;
            this.isModifier = isModifier;
        }
        public EffectFunctionalClass getFunctionalClass() {
            return functionalClass;
        }
        public boolean isModifier() {
            return isModifier;
        }
    }
    // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
    // classifying some of the LOW impact effects as MODIFIERs.
    public enum EffectImpact {
        MODIFIER  (0),
        LOW       (1),
        MODERATE  (2),
        HIGH      (3);
        private final int severityRating;
        EffectImpact ( int severityRating ) {
            this.severityRating = severityRating;
        }
        public boolean isHigherImpactThan ( EffectImpact other ) {
            return this.severityRating > other.severityRating;
        }
        public boolean isSameImpactAs ( EffectImpact other ) {
            return this.severityRating == other.severityRating;
        }
    }
    // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
    public enum EffectCoding {
        CODING,
        NON_CODING,
        UNKNOWN
    }
    // We assign a functional class to each SnpEff effect.
    public enum EffectFunctionalClass {
        NONE     (0),
        SILENT   (1),
        MISSENSE (2),
        NONSENSE (3);
        private final int priority;
        EffectFunctionalClass ( int priority ) {
            this.priority = priority;
        }
        public boolean isHigherPriorityThan ( EffectFunctionalClass other ) {
            return this.priority > other.priority;
        }
    }
    public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
        // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
        // without providing a SnpEff rod via --snpEffFile):
        validateRodBinding(walker.getSnpEffRodBinding());
        RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
        // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
        // the SnpEff rod, and that the file was generated by a supported version of SnpEff:
        VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
        VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
        VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
        checkSnpEffVersion(snpEffVersionLine);
        checkSnpEffCommandLine(snpEffCommandLine);
        // If everything looks ok, add the SnpEff version number and command-line header lines to the
        // header of the VCF output file, changing the key names so that our output file won't be
        // mistaken in the future for a SnpEff output file:
        headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
        headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
    }
    public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
-        RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
+        RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
        validateRodBinding(snpEffRodBinding);
-        List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
+        // Get only SnpEff records that start at this locus, not merely span it:
        List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
-        // Add only annotations for one of the most biologically-significant effects as defined in
+        // Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
-        // the SnpEffConstants class:
+        // If there is more than one such record, we only need to pick the first one, since the biological
-        SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
+        // effects will be the same across all such records:
-
+        VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
-        if ( mostSignificantEffect == null ) {
+        if ( matchingRecord == null ) {
            return null;
        }
-        return generateAnnotations(mostSignificantEffect);
+        // Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
        List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
        if ( effects.size() == 0 ) {
            return null;
        }
        // Add only annotations for one of the most biologically-significant effects from this set:
        SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
        return mostSignificantEffect.getAnnotations();
    }
-    private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
+    private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
        if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
-            throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
+            throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
-                                    "as a rodbinding on the command line, but no SnpEff rodbinding was found.");
+                                    "as a rodbinding on the command line via the --snpEffFile option, but " +
                                    "no SnpEff rodbinding was found.");
        }
    }
-    private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
+    private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
-        SnpEffFeature mostSignificantEffect = null;
+        if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
            throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
                                    "input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
                                    Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
        }
-        for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
+        String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
        if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
            throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
                                    "is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
        }
    }
    private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
        if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
            throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
                                    "input file, which should be added by all supported versions of SnpEff (" +
                                    Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
        }
    }
    private boolean isSupportedSnpEffVersion ( String versionString ) {
        for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
            if ( supportedVersion.equals(versionString) ) {
                return true;
            }
        }
        return false;
    }
    private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
        for ( VariantContext snpEffRecord : snpEffRecords ) {
            if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
                return snpEffRecord;
            }
        }
        return null;
    }
    private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
        List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
        Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
        if ( effectFieldValue == null ) {
            return parsedEffects;
        }
        // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
        // We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
        List<String> individualEffects;
        if ( effectFieldValue instanceof List ) {
            individualEffects = (List<String>)effectFieldValue;
        }
        else {
            individualEffects = Arrays.asList((String)effectFieldValue);
        }
        for ( String effectString : individualEffects ) {
            String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
            if ( effectNameAndMetadata.length != 2 ) {
                logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
                                          snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
                continue;
            }
            String effectName = effectNameAndMetadata[0];
            String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
            SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
            if ( parsedEffect.isWellFormed() ) {
                parsedEffects.add(parsedEffect);
            }
            else {
                logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
                                          snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
            }
        }
        return parsedEffects;
    }
    private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
        SnpEffEffect mostSignificantEffect = null;
        for ( SnpEffEffect effect : effects ) {
            if ( mostSignificantEffect == null ||
-                 snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
+                 effect.isHigherImpactThan(mostSignificantEffect) ) {
-                mostSignificantEffect = snpEffFeature;
+                mostSignificantEffect = effect;
            }
        }
        return mostSignificantEffect;
    }
    private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
        Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
        if ( mostSignificantEffect.hasGeneID() )
            annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
        if ( mostSignificantEffect.hasGeneName() )
            annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
        if ( mostSignificantEffect.hasTranscriptID() )
            annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
        if ( mostSignificantEffect.hasExonID() )
            annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
        if ( mostSignificantEffect.hasExonRank() )
            annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
        if ( mostSignificantEffect.isNonCodingGene() )
            annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
        annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
        annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
        if ( mostSignificantEffect.hasEffectExtraInformation() )
            annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
        if ( mostSignificantEffect.hasOldAndNewAA() )
            annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
        if ( mostSignificantEffect.hasOldAndNewCodon() )
            annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
        if ( mostSignificantEffect.hasCodonNum() )
            annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
        if ( mostSignificantEffect.hasCdsSize() )
            annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
        return annotations;
    }
    public List<String> getKeyNames() {
-        return Arrays.asList( GENE_ID_KEY,
+        return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
-                              GENE_NAME_KEY,
+                              InfoFieldKey.IMPACT_KEY.getKeyName(),
-                              TRANSCRIPT_ID_KEY,
+                              InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
-                              EXON_ID_KEY,
+                              InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
-                              EXON_RANK_KEY,
+                              InfoFieldKey.GENE_NAME_KEY.getKeyName(),
-                              WITHIN_NON_CODING_GENE_KEY,
+                              InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
-                              EFFECT_KEY,
+                              InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
-                              EFFECT_IMPACT_KEY,
+                              InfoFieldKey.EXON_ID_KEY.getKeyName(),
-                              EFFECT_EXTRA_INFORMATION_KEY,
+                              InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
                              OLD_NEW_AA_KEY,
                              OLD_NEW_CODON_KEY,
                              CODON_NUM_KEY,
                              CDS_SIZE_KEY
                            );
    }
    public List<VCFInfoHeaderLine> getDescriptions() {
        return Arrays.asList(
-            new VCFInfoHeaderLine(GENE_ID_KEY,                  1, VCFHeaderLineType.String,  "Gene ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
-            new VCFInfoHeaderLine(GENE_NAME_KEY,                1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
-            new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY,            1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EXON_ID_KEY,                  1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EXON_RANK_KEY,                1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(),         1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY,   0, VCFHeaderLineType.Flag,    "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Gene biotype for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EFFECT_KEY,                   1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
+            new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),     1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EFFECT_IMPACT_KEY,            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
+            new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String,  "Additional information about the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),  1, VCFHeaderLineType.String,  "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
            new VCFInfoHeaderLine(OLD_NEW_AA_KEY,               1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
            new VCFInfoHeaderLine(OLD_NEW_CODON_KEY,            1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
            new VCFInfoHeaderLine(CODON_NUM_KEY,                1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
            new VCFInfoHeaderLine(CDS_SIZE_KEY,                 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
        );
    }
    /**
     * Helper class to parse, validate, and store a single SnpEff effect and its metadata.
     */
    protected static class SnpEffEffect {
        private EffectType effect;
        private EffectImpact impact;
        private String codonChange;
        private String aminoAcidChange;
        private String geneName;
        private String geneBiotype;
        private EffectCoding coding;
        private String transcriptID;
        private String exonID;
        private String parseError = null;
        private boolean isWellFormed = true;
        private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
        private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
        private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
        // Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
        // errors come after warnings, not vice versa:
        private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
        private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
        private static final int SNPEFF_CODING_FIELD_INDEX = 5;
        public SnpEffEffect ( String effectName, String[] effectMetadata ) {
            parseEffectName(effectName);
            parseEffectMetadata(effectMetadata);
        }
        private void parseEffectName ( String effectName ) {
            try {
                effect = EffectType.valueOf(effectName);
            }
            catch ( IllegalArgumentException e ) {
                parseError(String.format("%s is not a recognized effect type", effectName));
            }
        }
        private void parseEffectMetadata ( String[] effectMetadata ) {
            if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
                if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
                    parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
                }
                else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
                    parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
                }
                else {
                    parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
                                             EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
                }
                return;
            }
            if ( effect != null && effect.isModifier() ) {
                impact = EffectImpact.MODIFIER;
            }
            else {
                try {
                    impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
                }
                catch ( IllegalArgumentException e ) {
                    parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
                }
            }
            codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
            aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
            geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
            geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
            if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
                try {
                    coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
                }
                catch ( IllegalArgumentException e ) {
                    parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
                }
            }
            else {
                coding = EffectCoding.UNKNOWN;
            }
            transcriptID = effectMetadata[InfoFieldKey.TRANSCRIPT_ID_KEY.getFieldIndex()];
            exonID = effectMetadata[InfoFieldKey.EXON_ID_KEY.getFieldIndex()];
        }
        private void parseError ( String message ) {
            isWellFormed = false;
            // Cache only the first error encountered:
            if ( parseError == null ) {
                parseError = message;
            }
        }
        public boolean isWellFormed() {
            return isWellFormed;
        }
        public String getParseError() {
            return parseError == null ? "" : parseError;
        }
        public boolean isCoding() {
            return coding == EffectCoding.CODING;
        }
        public boolean isHigherImpactThan ( SnpEffEffect other ) {
            // If one effect is within a coding gene and the other is not, the effect that is
            // within the coding gene has higher impact:
            if ( isCoding() && ! other.isCoding() ) {
                return true;
            }
            else if ( ! isCoding() && other.isCoding() ) {
                return false;
            }
            // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
            // of the effects themselves. Effects with the same impact are tie-broken using the
            // functional class of the effect:
            if ( impact.isHigherImpactThan(other.impact) ) {
                return true;
            }
            else if ( impact.isSameImpactAs(other.impact) ) {
                return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
            }
            return false;
        }
        public Map<String, Object> getAnnotations() {
            Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
            addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
            addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
            addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
            addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
            addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
            addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
            addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
            addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
            addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
            return annotations;
        }
        private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
            // Only add annotations for keys associated with non-empty values:
            if ( keyValue != null && keyValue.trim().length() > 0 ) {
                annotations.put(keyName, keyValue);
            }
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;
 /**
 * Fraction of reads containing spanning deletions at this site.
 */
 public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
 import java.util.Map;
 /**
- * Created by IntelliJ IDEA.
+ * Counts of bases from SLX, 454, and SOLiD at this site
 * User: delangel
 * Date: 6/29/11
 * Time: 3:14 PM
 * To change this template use File | Settings | File Templates.
 */
@Hidden
 public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
    private String nSLX = "NumSLX";
    private String n454 ="Num454";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -86,14 +85,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
    public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }
    /**
     * The INFO field will be annotated with information on the most biologically-significant effect
     * listed in the SnpEff output file for each variant.
     */
    @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
-    public RodBinding<SnpEffFeature> snpEffFile;
+    public RodBinding<VariantContext> snpEffFile;
-    public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
+    public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
    /**
      * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;
    @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
    public String familyStr = null;
    @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
    public double minGenotypeQualityP = 0.0;
    private VariantAnnotatorEngine engine;
    private Collection<VariantContext> indelBufferContext;
@ -203,9 +209,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
        }
        if ( USE_ALL_ANNOTATIONS )
-            engine = new VariantAnnotatorEngine(this);
+            engine = new VariantAnnotatorEngine(this, getToolkit());
        else
-            engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
+            engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
        engine.initializeExpressions(expressionsToUse);
        // setup the header fields
@ -217,6 +223,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
                hInfo.add(line);
        }
        engine.invokeAnnotationInitializationMethods(hInfo);
        VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
        vcfWriter.writeHeader(vcfHeader);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@ -26,13 +26,11 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -49,6 +47,7 @@ public class VariantAnnotatorEngine {
    private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
    private AnnotatorCompatibleWalker walker;
    private GenomeAnalysisEngine toolkit;
    private static class VAExpression {
@ -74,16 +73,18 @@ public class VariantAnnotatorEngine {
    }
    // use this constructor if you want all possible annotations
-    public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
+    public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
        this.walker = walker;
        this.toolkit = toolkit;
        requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
        requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
        initializeDBs();
    }
    // use this constructor if you want to select specific annotations (and/or interfaces)
-    public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
+    public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
        this.walker = walker;
        this.toolkit = toolkit;
        initializeAnnotations(annotationGroupsToUse, annotationsToUse);
        initializeDBs();
    }
@ -113,6 +114,16 @@ public class VariantAnnotatorEngine {
            dbAnnotations.put(rod, rod.getName());
    }
    public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
        for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
            annotation.initialize(walker, toolkit, headerLines);
        }
        for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
            annotation.initialize(walker, toolkit, headerLines);
        }
    }
    public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {
        Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
@ -1,7 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.List;
@ -9,7 +8,8 @@ import java.util.List;
 public interface AnnotatorCompatibleWalker {
    // getter methods for various used bindings
-    public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
+    public abstract RodBinding<VariantContext> getVariantRodBinding();
    public abstract RodBinding<VariantContext> getSnpEffRodBinding();
    public abstract RodBinding<VariantContext> getDbsnpRodBinding();
    public abstract List<RodBinding<VariantContext>> getCompRodBindings();
    public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
@ -24,18 +24,18 @@
 package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.List;
-import java.util.Map;
+import java.util.Set;
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
 public abstract class VariantAnnotatorAnnotation {
    // return the INFO keys
    public abstract List<String> getKeyNames();
    // initialization method (optional for subclasses, and therefore non-abstract)
    public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java
@ -175,21 +175,16 @@ public class BeagleOutputToVCFWalker  extends RodWalker<Integer, Integer> {
        }
        BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2);
        // ignore places where we don't have a variant
        if ( beagleR2Feature == null )
            return 0;
        BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs);
        // ignore places where we don't have a variant
        if ( beagleProbsFeature == null )
            return 0;
        BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased);
        // ignore places where we don't have a variant
-        if ( beaglePhasedFeature == null )
+        if ( beagleR2Feature == null || beagleProbsFeature == null ||  beaglePhasedFeature == null)
-            return 0;
+        {
           vcfWriter.add(vc_input);
           return 1;
        }
        // get reference base for current position
        byte refByte = ref.getBase();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
@ -63,20 +63,32 @@ import java.util.*;
 * <h2>Input</h2>
 * <p>
 * One or more bam files (with proper headers) to be analyzed for coverage statistics
 * (Optional) A REFSEQ Rod to aggregate coverage to the gene level
 * </p>
- *
+ * <p>
 *(Optional) A REFSEQ Rod to aggregate coverage to the gene level
 * <p>
 * (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
 *</p></p>
 * <h2>Output</h2>
 * <p>
 * Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
 * </p><p>
 *  - no suffix: per locus coverage
 * </p><p>
 *  - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
 * </p><p>
 *  - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
 * </p><p>
 *  - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
 * </p><p>
 *  - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
 * </p><p>
 *  - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
 * </p><p>
 *  - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
 * </p><p>
 *  - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
 * </p><p>
 *  - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
 * </p>
 *
@ -84,7 +96,7 @@ import java.util.*;
 * <pre>
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
- *   -T VariantEval \
+ *   -T DepthOfCoverage \
 *   -o file_name_base \
 *   -I input_bams.list
 *   [-geneList refSeq.sorted.txt] \
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
@ -43,8 +43,10 @@ import java.util.List;
 * Generates an alternative reference sequence over the specified interval.
 *
 * <p>
- * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
+ * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
- * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
 * Note that if there are multiple variants at a site, it takes the first one seen.
 * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
 * <p>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
@ -42,6 +42,9 @@ import java.io.PrintStream;
 *
 * <p>
 * The output format can be partially controlled using the provided command-line arguments.
 * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
 * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
 * separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
 * <p>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
@ -23,7 +23,7 @@
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
-package org.broadinstitute.sting.utils.genotype;
+package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.utils.BaseUtils;
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
 * Time: 6:46:09 PM
 * To change this template use File | Settings | File Templates.
 */
-public enum DiploidGenotype {
+enum DiploidGenotype {
    AA ('A', 'A'),
    AC ('A', 'C'),
    AG ('A', 'G'),
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 /**
 * Created by IntelliJ IDEA.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 import org.broadinstitute.sting.utils.pileup.FragmentPileup;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -276,8 +275,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
        if ( elt.isReducedRead() ) {
            // reduced read representation
            byte qual = elt.getReducedQual();
-            add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
+            if ( BaseUtils.isRegularBase( elt.getBase() )) {
-            return elt.getReducedCount(); // we added nObs bases here
+                add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
                return elt.getReducedCount(); // we added nObs bases here
            } else // odd bases or deletions => don't use them
                return 0;
        } else {
            byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
            return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 import java.util.Arrays;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    // code for testing purposes
    //
    private final static boolean DEBUG = false;
    private final static boolean PRINT_LIKELIHOODS = false;
    private final static int N_CYCLES = 1;
    private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
    private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
    private final static boolean COMPARE_TO_GS = false;
    public enum ExactCalculation {
        N2_GOLD_STANDARD,
        LINEAR_EXPERIMENTAL
    }
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
    private final boolean SIMPLE_GREEDY_GENOTYPER = false;
    private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
    private boolean SIMPLE_GREEDY_GENOTYPER = false;
    final private ExactCalculation calcToUse;
    protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
        calcToUse = UAC.EXACT_CALCULATION_TYPE;
    }
    public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                Map<String, Genotype> GLs, Set<Allele>alleles,
                                double[] log10AlleleFrequencyPriors,
                                double[] log10AlleleFrequencyPosteriors) {
-        // todo -- REMOVE ME AFTER TESTING
+        final int numAlleles = alleles.size();
-        // todo -- REMOVE ME AFTER TESTING
+        final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
-        // todo -- REMOVE ME AFTER TESTING
+        final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;
        double[] gsPosteriors;
        if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
            gsPosteriors = log10AlleleFrequencyPosteriors.clone();
        int idxAA = GenotypeType.AA.ordinal();
        int idxAB = GenotypeType.AB.ordinal();
        int idxBB = GenotypeType.BB.ordinal();
        // todo -- remove me after testing
        if ( N_CYCLES > 1 ) {
            for ( int i = 0; i < N_CYCLES; i++) {
                timerGS.restart();
                linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
                timerGS.stop();
                timerExpt.restart();
                linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
                timerExpt.stop();
            }
            System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
                    timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
        }
        int lastK = -1;
        int numAlleles = alleles.size();
        int idxDiag = numAlleles;
        int incr = numAlleles - 1;
        double[][] posteriorCache = new double[numAlleles-1][];
        double[] bestAFguess = new double[numAlleles-1];
        for (int k=1; k < numAlleles; k++) {
            // multi-allelic approximation, part 1: Ideally
            // for each alt allele compute marginal (suboptimal) posteriors -
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
            // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD
-            idxAA = 0;
+            final int idxAA = 0;
-            idxAB = k;
+            final int idxAB = k;
            // yy is always element on the diagonal.
            // 2 alleles: BBelement 2
            // 3 alleles: BB element  3. CC element 5
            // 4 alleles:
-            idxBB = idxDiag;
+            final int idxBB = idxDiag;
            idxDiag += incr--;
-            // todo - possible cleanup
+            final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
-            switch ( calcToUse ) {
+
                case N2_GOLD_STANDARD:
                    lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
                    break;
                case LINEAR_EXPERIMENTAL:
                    lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
                    break;
            }
            if (numAlleles > 2) {
                posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
                bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
@ -153,47 +100,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);
        }
        // todo -- REMOVE ME AFTER TESTING
        // todo -- REMOVE ME AFTER TESTING
        // todo -- REMOVE ME AFTER TESTING
        if ( COMPARE_TO_GS ) {
            gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
            double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
            double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
            boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
            if ( ! eq || PRINT_LIKELIHOODS ) {
                System.out.printf("----------------------------------------%n");
                for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
                    double x = log10AlleleFrequencyPosteriors[k];
                    System.out.printf("  %d\t%.2f\t%.2f\t%b%n", k,
                            x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
                            log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
                }
                System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
                        ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
            }
        }
    }
-    private static final double[][] getGLs(Map<String, Genotype> GLs) {
+    private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
-        double[][] genotypeLikelihoods = new double[GLs.size()+1][];
+        ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();
-        int j = 0;
+        genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
        for ( Genotype sample : GLs.values() ) {
            j++;
            if ( sample.hasLikelihoods() ) {
-                //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
+                double[] gls = sample.getLikelihoods().getAsVector();
-                genotypeLikelihoods[j] = sample.getLikelihoods().getAsVector();
+
                if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
                    genotypeLikelihoods.add(gls);
            }
        }
        return genotypeLikelihoods;
    }
    // -------------------------------------------------------------------------------------
    //
    // Linearized, ~O(N), implementation.
@ -237,90 +162,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
    }
    // now with banding
    public int linearExactBanded(Map<String, Genotype> GLs,
                                 double[] log10AlleleFrequencyPriors,
                                 double[] log10AlleleFrequencyPosteriors) {
        throw new NotImplementedException();
 //        final int numSamples = GLs.size();
 //        final int numChr = 2*numSamples;
 //        final double[][] genotypeLikelihoods = getGLs(GLs);
 //
 //        final ExactACCache logY = new ExactACCache(numSamples+1);
 //        logY.getkMinus0()[0] = 0.0; // the zero case
 //
 //        double maxLog10L = Double.NEGATIVE_INFINITY;
 //        boolean done = false;
 //        int lastK = -1;
 //        final int BAND_SIZE = 10;
 //
 //        for (int k=0; k <= numChr && ! done; k++ ) {
 //            final double[] kMinus0 = logY.getkMinus0();
 //            int jStart = Math.max(k - BAND_SIZE, 1);
 //            int jStop = Math.min(k + BAND_SIZE, numSamples);
 //
 //            if ( k == 0 ) { // special case for k = 0
 //                for ( int j=1; j <= numSamples; j++ ) {
 //                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
 //                }
 //            } else { // k > 0
 //                final double[] kMinus1 = logY.getkMinus1();
 //                final double[] kMinus2 = logY.getkMinus2();
 //                Arrays.fill(kMinus0,0);
 //
 //                for ( int j = jStart; j <= jStop; j++ ) {
 //                    final double[] gl = genotypeLikelihoods[j];
 //                    final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
 //
 //                    double aa = Double.NEGATIVE_INFINITY;
 //                    double ab = Double.NEGATIVE_INFINITY;
 //                    if (k < 2*j-1)
 //                        aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
 //
 //                    if (k < 2*j)
 //                        ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
 //
 //                    double log10Max;
 //                    if (k > 1) {
 //                        final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
 //                        log10Max = approximateLog10SumLog10(aa, ab, bb);
 //                    } else {
 //                        // we know we aren't considering the BB case, so we can use an optimized log10 function
 //                        log10Max = approximateLog10SumLog10(aa, ab);
 //                    }
 //
 //                    // finally, update the L(j,k) value
 //                    kMinus0[j] = log10Max - logDenominator;
 //
 //                    String offset = Utils.dupString(' ',k);
 //                    System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
 //                }
 //            }
 //
 //            // update the posteriors vector
 //            final double log10LofK = kMinus0[jStop];
 //            log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
 //
 //            // can we abort early?
 //            lastK = k;
 //            maxLog10L = Math.max(maxLog10L, log10LofK);
 //            if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
 //                if ( DEBUG ) System.out.printf("  *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
 //                done = true;
 //            }
 //
 //            logY.rotate();
 //        }
 //
 //        return lastK;
    }
    public int linearExact(Map<String, Genotype> GLs,
                           double[] log10AlleleFrequencyPriors,
                           double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
-        final int numSamples = GLs.size();
+        final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
        final int numSamples = genotypeLikelihoods.size()-1;
        final int numChr = 2*numSamples;
        final double[][] genotypeLikelihoods = getGLs(GLs);
        final ExactACCache logY = new ExactACCache(numSamples+1);
        logY.getkMinus0()[0] = 0.0; // the zero case
@ -334,14 +181,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            if ( k == 0 ) { // special case for k = 0
                for ( int j=1; j <= numSamples; j++ ) {
-                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
+                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
                }
            } else { // k > 0
                final double[] kMinus1 = logY.getkMinus1();
                final double[] kMinus2 = logY.getkMinus2();
                for ( int j=1; j <= numSamples; j++ ) {
-                    final double[] gl = genotypeLikelihoods[j];
+                    final double[] gl = genotypeLikelihoods.get(j);
                    final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
                    double aa = Double.NEGATIVE_INFINITY;
@ -434,10 +281,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( !vc.isVariant() )
            throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());
        boolean multiAllelicRecord = false;
        if (vc.getAlternateAlleles().size() > 1)
            multiAllelicRecord = true;
        Map<String, Genotype> GLs = vc.getGenotypes();
        double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
@ -454,7 +297,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        pathMetricArray[0][0] = 0.0;
        // todo = can't deal with optimal dynamic programming solution with multiallelic records
-        if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
+        if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
            sampleIndices.addAll(GLs.keySet());
            sampleIdx = GLs.size();
        }
@ -465,6 +308,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                    continue;
                double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
                if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL)     {
                    //System.out.print(sample.getKey()+":");
                    //for (int k=0; k < likelihoods.length; k++)
                    //   System.out.format("%4.2f ",likelihoods[k]);
                    //System.out.println();
                    // all likelihoods are essentially the same: skip this sample and will later on force no call.
                    //sampleIdx++;
                    continue;
                }
                sampleIndices.add(sample.getKey());
                for (int k=0; k <= AFofMaxLikelihood; k++) {
@ -504,22 +358,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            Genotype g = GLs.get(sample);
            if ( !g.hasLikelihoods() )
                continue;
-
+            // if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
-            if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
+            // and will add no-call genotype to GL's in a second pass
                bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
            else {
                int newIdx = tracebackArray[k][startIdx];
                bestGTguess = startIdx - newIdx;
                startIdx = newIdx;
            }
            ArrayList<Allele> myAlleles = new ArrayList<Allele>();
            double qual = Double.NEGATIVE_INFINITY;
            double[] likelihoods = g.getLikelihoods().getAsVector();
            if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
                bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
            }
            else {
                int newIdx = tracebackArray[k][startIdx];;
                bestGTguess = startIdx - newIdx;
                startIdx = newIdx;
            }
            /*           System.out.format("Sample: %s GL:",sample);
                    for (int i=0; i < likelihoods.length; i++)
-                        System.out.format("%1.4f ",likelihoods[i]);
+                        System.out.format("%1.4f, ",likelihoods[i]);
            */
            for (int i=0; i < likelihoods.length; i++) {
@ -570,83 +427,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
        return calls;
    }
    // -------------------------------------------------------------------------------------
    //
    // Gold standard, but O(N^2), implementation.
    //
    // TODO -- remove me for clarity in this code
    //
    // -------------------------------------------------------------------------------------
    public int gdaN2GoldStandard(Map<String, Genotype> GLs,
                                 double[] log10AlleleFrequencyPriors,
                                 double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
        int numSamples = GLs.size();
        int numChr = 2*numSamples;
        double[][] logYMatrix = new double[1+numSamples][1+numChr];
        for (int i=0; i <=numSamples; i++)
            for (int j=0; j <=numChr; j++)
                logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
        //YMatrix[0][0] = 1.0;
        logYMatrix[0][0] = 0.0;
        int j=0;
        for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
            j++;
            if ( !sample.getValue().hasLikelihoods() )
                continue;
            Genotype g = GLs.get(sample.getKey());
-            //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
+            double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
            double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
            //double logDenominator = Math.log10(2.0*j*(2.0*j-1));
            double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
-            // special treatment for k=0: iteration reduces to:
+            if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
-            //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
+                continue; // regular likelihoods
            logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
-            for (int k=1; k <= 2*j; k++ ) {
+            ArrayList<Allele> myAlleles = new ArrayList<Allele>();
                //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
                double logNumerator[];
                logNumerator = new double[3];
                if (k < 2*j-1)
                    logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
                            genotypeLikelihoods[idxAA];
                else
                    logNumerator[0] = Double.NEGATIVE_INFINITY;
                if (k < 2*j)
                    logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
                            genotypeLikelihoods[idxAB];
                else
                    logNumerator[1] = Double.NEGATIVE_INFINITY;
                if (k > 1)
                    logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
                            genotypeLikelihoods[idxBB];
                else
                    logNumerator[2] = Double.NEGATIVE_INFINITY;
                double logNum = MathUtils.softMax(logNumerator);
                //YMatrix[j][k] = num/den;
                logYMatrix[j][k] = logNum - logDenominator;
            }
            double qual = Genotype.NO_NEG_LOG_10PERROR;
            myAlleles.add(Allele.NO_CALL);
            myAlleles.add(Allele.NO_CALL);
            //System.out.println(myAlleles.toString());
            calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false));
        }
-
+        return calls;
        for (int k=0; k <= numChr; k++)
            log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
        return numChr;
    }
    private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
@ -657,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            System.out.printf("  %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -32,10 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -70,9 +71,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    // gdebug removeme
    // todo -cleanup
    private HaplotypeIndelErrorModel model;
    private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
 //
    private GenomeLoc lastSiteVisited;
    private ArrayList<Allele> alleleList;
@ -83,26 +81,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
    protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
-        if (UAC.GSA_PRODUCTION_ONLY == false) {
+        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,UAC.OUTPUT_DEBUG_INDEL_INFO);
            pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
                    UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
            useOldWrongHorribleHackedUpLikelihoodModel = false;
        }
        else {
            useOldWrongHorribleHackedUpLikelihoodModel = true;
            double INSERTION_START_PROBABILITY = 1e-3;
            double INSERTION_END_PROBABILITY = 0.5;
            double ALPHA_DELETION_PROBABILITY = 1e-3;
            model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
                    INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
        }
        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
                    UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
        alleleList = new ArrayList<Allele>();
        getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
        minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
@ -321,7 +300,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            haplotypeMap.clear();
            if (getAlleleListFromVCF) {
-                 for( final VariantContext vc_input : tracker.getValues(UAC.alleles) ) {
+                 for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
                      if( vc_input != null &&
                              allowableTypes.contains(vc_input.getType()) &&
                              ref.getLocus().getStart() == vc_input.getStart()) {
@ -382,20 +361,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                }
            }
        }
        int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
        int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
        int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
-        if (useOldWrongHorribleHackedUpLikelihoodModel) {
+        final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
-            numPrefBases = 20;
+        final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
-            hsize=80;
+        final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
-        }
+
        if (DEBUG)
            System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
                    (int)ref.getWindow().size(), loc.getStart(), numPrefBases);
        //System.out.println(eventLength);
-        haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
+        haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
-            ref, hsize, numPrefBases);
+                ref, hsize, numPrefBases);
        // For each sample, get genotype likelihoods based on pileup
        // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -412,17 +388,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                pileup = context.getBasePileup();
            if (pileup != null ) {
-                double[] genotypeLikelihoods;
+                final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
                if (useOldWrongHorribleHackedUpLikelihoodModel)
                   genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
                else
                    genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
-
+                GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
                // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example
                // for 3 alleles it's 00 01 11 02 12 22
                 GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
                        alleleList,
                        genotypeLikelihoods,
                        getFilteredDepth(pileup)));
@ -444,4 +412,16 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        return indelLikelihoodMap.get();
    }
    // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
    // so that per-sample DP will include deletions covering the event.
    protected int getFilteredDepth(ReadBackedPileup pileup) {
        int count = 0;
        for ( PileupElement p : pileup ) {
            if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) )
                count++;
        }
        return count;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -26,16 +26,14 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -58,25 +56,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
    }
    public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
        if ( tracker == null || ref == null || logger == null )
            throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
        VariantContext vc = null;
        // search for usable record
        for( final VariantContext vc_input : tracker.getValues(allelesBinding) ) {
            if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
                if ( vc == null ) {
                    vc = vc_input;
                } else {
                    logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record");
                }
            }
        }
        return vc;
    }
    public Allele getLikelihoods(RefMetaDataTracker tracker,
                                 ReferenceContext ref,
                                 Map<String, AlignmentContext> contexts,
@ -96,7 +75,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        if ( alternateAlleleToUse != null ) {
            bestAlternateAllele = alternateAlleleToUse.getBases()[0];
        } else if ( useAlleleFromVCF ) {
-            VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger, UAC.alleles);
+            VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);
            // ignore places where we don't have a variant
            if ( vc == null )
@ -143,8 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
            aList.add(refAllele);
            aList.add(altAllele);
            double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
            // normalize in log space so that max element is zero.
            GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
-                    aList,  dlike, getFilteredDepth(pileup)));
+                    aList,  MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
        }
        return refAllele;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -143,35 +143,21 @@ public class UnifiedArgumentCollection {
    @Hidden
    @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
    public int INDEL_HAPLOTYPE_SIZE = 80;
-    @Hidden
+
    @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
     public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
    //gdebug+
    // experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
-    @Hidden
+//    @Hidden
-    @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
+//    @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
-    public boolean GET_GAP_PENALTIES_FROM_DATA = false;
+//    public boolean GET_GAP_PENALTIES_FROM_DATA = false;
-
+//
-    @Hidden
+//    @Hidden
-    @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
+//    @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
-    public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
+//    public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
    @Hidden
    @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
    public boolean OUTPUT_DEBUG_INDEL_INFO = false;
    @Hidden
    @Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false)
    public boolean dovit = false;
    @Hidden
    @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
    public boolean GSA_PRODUCTION_ONLY = false;
    @Hidden
    @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
    public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
    @Hidden
    @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
    public boolean IGNORE_SNP_ALLELES = false;
@ -191,7 +177,6 @@ public class UnifiedArgumentCollection {
        uac.GLmodel = GLmodel;
        uac.AFmodel = AFmodel;
        uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
        uac.heterozygosity = heterozygosity;
        uac.PCR_error = PCR_error;
        uac.GenotypingMode = GenotypingMode;
@ -209,15 +194,10 @@ public class UnifiedArgumentCollection {
        uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
        uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
        uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
        uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
        uac.alleles = alleles;
        uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
        uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
        // todo- arguments to remove
        uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
        uac.dovit = dovit;
        uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
        uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
        return uac;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -127,7 +126,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
-    public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
+    public RodBinding<VariantContext> getVariantRodBinding() { return null; }
    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
    public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
@ -210,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        if ( verboseWriter != null )
            verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
-        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
+        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
        // initialize the header
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
@ -36,13 +37,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.*;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.PrintStream;
 import java.util.*;
@ -236,10 +235,11 @@ public class UnifiedGenotyperEngine {
    private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
        VariantContext vc;
        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-            VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
+            VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
            if ( vcInput == null )
                return null;
-            vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
+            vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase());
        } else {
            // deal with bad/non-standard reference bases
            if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine {
            AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
    }
    private final static double[] binomialProbabilityDepthCache = new double[10000];
    static {
        for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
            binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
        }
    }
    private final double getRefBinomialProb(final int depth) {
        if ( depth < binomialProbabilityDepthCache.length )
            return binomialProbabilityDepthCache[depth];
        else
            return MathUtils.binomialProbability(0, depth, 0.5);
    }
    private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
        if ( contexts == null )
            return null;
@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine {
                    depth = context.getExtendedEventPileup().size();
            }
-            P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
+            P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
        }
        return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
@ -635,7 +650,7 @@ public class UnifiedGenotyperEngine {
            // no extended event pileup
            // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
            if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
-                VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger, UAC.alleles);
+                VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
                if (vcInput == null)
                    return null;
@ -741,4 +756,23 @@ public class UnifiedGenotyperEngine {
        return afcm;
    }
    public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
        if ( tracker == null || ref == null || logger == null )
            throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
        VariantContext vc = null;
        // search for usable record
        for( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
            if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
                if ( vc == null ) {
                    vc = vc_input;
                } else {
                    logger.warn("Multiple valid VCF records detected in the alleles input file at site " + ref.getLocus() + ", only considering the first record");
                }
            }
        }
        return vc;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
@ -26,9 +26,9 @@
 package org.broadinstitute.sting.gatk.walkers.indels;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
        baseMatchArray = new double[MAX_CACHED_QUAL+1];
        baseMismatchArray = new double[MAX_CACHED_QUAL+1];
        for (int k=1; k <= MAX_CACHED_QUAL; k++) {
-            double baseProb = QualityUtils.qualToProb(k);
+            double baseProb = QualityUtils.qualToProb((byte)k);
            baseMatchArray[k] =  probToQual(baseProb);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -28,9 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.indels;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -50,36 +51,8 @@ import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Reca
 public class PairHMMIndelErrorModel {
    public static final int BASE_QUAL_THRESHOLD = 20;
    private static final int MATCH_OFFSET = 0;
    private static final int X_OFFSET = 1;
    private static final int Y_OFFSET = 2;
    private static final int DIAG = 0;
    private static final int UP = 1;
    private static final int LEFT = 2;
    private static final int DIAG_GOTO_M = 0;
    private static final int DIAG_GOTO_X = 1;
    private static final int DIAG_GOTO_Y = 2;
    private static final int UP_GOTO_M = 4;
    private static final int UP_GOTO_X = 5;
    private static final int UP_GOTO_Y = 6;
    private static final int LEFT_GOTO_M = 8;
    private static final int LEFT_GOTO_X = 9;
    private static final int LEFT_GOTO_Y = 10;
    private static final int[] ACTIONS_M = {DIAG_GOTO_M, DIAG_GOTO_X, DIAG_GOTO_Y};
    private static final int[] ACTIONS_X = {UP_GOTO_M, UP_GOTO_X, UP_GOTO_Y};
    private static final int[] ACTIONS_Y = {LEFT_GOTO_M, LEFT_GOTO_X, LEFT_GOTO_Y};
    private final double logGapOpenProbability;
    private final double logGapContinuationProbability;
@ -100,36 +73,13 @@ public class PairHMMIndelErrorModel {
    private static final double MIN_GAP_CONT_PENALTY = 10.0;
    private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.
    private boolean doViterbi = false;
    private final boolean useAffineGapModel = true;
    private boolean doContextDependentPenalties = false;
    private final double[] GAP_OPEN_PROB_TABLE;
    private final double[] GAP_CONT_PROB_TABLE;
    private boolean getGapPenaltiesFromFile = false;
    private int SMOOTHING = 1;
    private int MAX_QUALITY_SCORE = 50;
    private int PRESERVE_QSCORES_LESS_THAN = 5;
    /////////////////////////////
    // Private Member Variables
    /////////////////////////////
-//copy+
+
 /*    private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
    private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
    private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
    private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
    private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
    protected static final String EOF_MARKER = "EOF";
    private long numReadsWithMalformedColorSpace = 0;
    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
  */
 //copy-
    static {
        LOG_ONE_HALF= -Math.log10(2.0);
        END_GAP_COST = LOG_ONE_HALF;
@ -145,141 +95,9 @@ public class PairHMMIndelErrorModel {
        }
    }
-    public  PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit,boolean gpf, File RECAL_FILE) {
+    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) {
        this(indelGOP, indelGCP, deb, doCDP, dovit);
        this.getGapPenaltiesFromFile = gpf;
        // read data from recal file
        // gdebug - start copy from TableRecalibrationWalker
 /*        if (gpf) {
            boolean sawEOF = false;
            boolean REQUIRE_EOF = false;
            int lineNumber = 0;
            boolean foundAllCovariates = false;
            // Get a list of all available covariates
            final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
            try {
                for ( String line : new XReadLines(RECAL_FILE) ) {
                    lineNumber++;
                    if ( EOF_MARKER.equals(line) ) {
                        sawEOF = true;
                    } else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() )  {
                        ; // Skip over the comment lines, (which start with '#')
                    }
                    // Read in the covariates that were used from the input file
                    else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
                        if( foundAllCovariates ) {
                            throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
                        } else { // Found the covariate list in input file, loop through all of them and instantiate them
                            String[] vals = line.split(",");
                            for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
                                boolean foundClass = false;
                                for( Class<?> covClass : classes ) {
                                    if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
                                        foundClass = true;
                                        try {
                                            Covariate covariate = (Covariate)covClass.newInstance();
                                            requestedCovariates.add( covariate );
                                        } catch (Exception e) {
                                            throw new DynamicClassResolutionException(covClass, e);
                                        }
                                    }
                                }
                                if( !foundClass ) {
                                    throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
                                }
                            }
                        }
                    } else { // Found a line of data
                        if( !foundAllCovariates ) {
                            foundAllCovariates = true;
                            // At this point all the covariates should have been found and initialized
                            if( requestedCovariates.size() < 2 ) {
                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
                            }
                            final boolean createCollapsedTables = true;
                            // Initialize any covariate member variables using the shared argument collection
                            for( Covariate cov : requestedCovariates ) {
                                cov.initialize( RAC );
                            }
                            // Initialize the data hashMaps
                            dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
                        }
                        addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
                    }
                }
            } catch ( FileNotFoundException e ) {
                throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
            } catch ( NumberFormatException e ) {
                throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
            }
            if ( !sawEOF ) {
                final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
                if ( REQUIRE_EOF )
                    throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
            }
            if( dataManager == null ) {
                throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
            }
            // Create the tables of empirical quality scores that will be used in the sequential calculation
            dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
        }
        // debug end copy
  */
    }
    /**
     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
     */
 /*
    private void addCSVData(final File file, final String line) {
        final String[] vals = line.split(",");
        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
        }
        final Object[] key = new Object[requestedCovariates.size()];
        Covariate cov;
        int iii;
        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
            cov = requestedCovariates.get( iii );
            key[iii] = cov.getValue( vals[iii] );
        }
        // Create a new datum using the number of observations, number of mismatches, and reported quality score
        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
        // Add that datum to all the collapsed tables which will be used in the sequential calculation
        dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
    }
 */
    public  PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
        this(indelGOP, indelGCP, deb, doCDP);
        this.doViterbi = dovit;
    }
    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
        this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
        this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
        this.doContextDependentPenalties = doCDP;
        this.DEBUG = deb;
@ -313,132 +131,6 @@ public class PairHMMIndelErrorModel {
    }
    private double computeReadLikelihoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) {
        final int X_METRIC_LENGTH = readBases.length+1;
        final int Y_METRIC_LENGTH = haplotypeBases.length+1;
        // initialize path metric and traceback memories for likelihood computation
        double[][] pathMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        int[][] bestMetricArray = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        pathMetricArray[0][0]= 0;//Double.NEGATIVE_INFINITY;
        for (int i=1; i < X_METRIC_LENGTH; i++) {
            pathMetricArray[i][0] = 0;
            bestMetricArray[i][0] = UP;
        }
        for (int j=1; j < Y_METRIC_LENGTH; j++) {
            pathMetricArray[0][j] = 0;//logGapOpenProbability + (j-1) * logGapContinuationProbability;
            bestMetricArray[0][j] = LEFT;
        }
        for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
            for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
                byte x = readBases[indI-1];
                byte y = haplotypeBases[indJ-1];
                byte qual = readQuals[indI-1];
                double bestMetric = 0.0;
                int bestMetricIdx = 0;
                // compute metric for match/mismatch
                // workaround for reads whose bases quality = 0,
                if (qual < 1)
                    qual = 1;
                if (qual > MAX_CACHED_QUAL)
                    qual = MAX_CACHED_QUAL;
                double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
                double[] metrics = new double[3];
                metrics[DIAG] = pathMetricArray[indI-1][indJ-1] + pBaseRead;
                metrics[UP] = pathMetricArray[indI-1][indJ] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
                metrics[LEFT] = pathMetricArray[indI][indJ-1] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
                if (doViterbi) {
                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
                    bestMetric = metrics[bestMetricIdx];
                }
                else
                    bestMetric = MathUtils.softMax(metrics);
                pathMetricArray[indI][indJ] = bestMetric;
                bestMetricArray[indI][indJ] = bestMetricIdx;
            }
        }
        double bestMetric=0.0;
        int bestMetricIdx=0,bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
        for (int i=0; i < X_METRIC_LENGTH; i ++ ) {
            int j= Y_METRIC_LENGTH-1;
            if (pathMetricArray[i][j] > bestMetric) {
                bestMetric = pathMetricArray[i][j];
                bestI = i;
                bestJ = j;
            }
        }
        for (int j=0; j < Y_METRIC_LENGTH; j++ ) {
            int i= X_METRIC_LENGTH-1;
            if (pathMetricArray[i][j] >= bestMetric) {
                bestMetric = pathMetricArray[i][j];
                bestI = i;
                bestJ = j;
            }
        }
        if (DEBUG && doViterbi) {
            String haplotypeString = new String (haplotypeBases);
            String readString = new String(readBases);
            int i = bestI;
            int j = bestJ;
            System.out.println("Simple NW");
            while (i >0 || j >0) {
                bestMetricIdx = bestMetricArray[i][j];
                System.out.print(bestMetricIdx);
                if (bestMetricIdx == UP) {
                    // insert gap in Y
                    haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
                    i--;
                } else if (bestMetricIdx == LEFT) {
                    readString = readString.substring(0,i)+"-"+readString.substring(i);
                    j--;
                }
                else {
                    i--; j--;
                }
            }
            System.out.println("\nAlignment: ");
            System.out.println("R:"+readString);
            System.out.println("H:"+haplotypeString);
            System.out.println();
        }
        if (DEBUG)
            System.out.format("Likelihood: %5.4f\n", bestMetric);
        return bestMetric;
    }
    static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
        // compute forward hrun length, example:
        // AGGTGACCCCCCTGAGAG
@ -479,14 +171,10 @@ public class PairHMMIndelErrorModel {
        final int Y_METRIC_LENGTH = haplotypeBases.length+1;
        // initialize path metric and traceback memories for likelihood computation
-        double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        int[][] bestActionArrayM = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        int[][] bestActionArrayX = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        int[][] bestActionArrayY = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
        double c,d;
        matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;
        for (int i=1; i < X_METRIC_LENGTH; i++) {
@ -494,8 +182,6 @@ public class PairHMMIndelErrorModel {
            matchMetricArray[i][0]  = Double.NEGATIVE_INFINITY;
            YMetricArray[i][0]      = Double.NEGATIVE_INFINITY;
            XMetricArray[i][0]      = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
            bestActionArrayX[i][0] = bestActionArrayY[i][0] = bestActionArrayM[i][0] = UP_GOTO_X;
        }
        for (int j=1; j < Y_METRIC_LENGTH; j++) {
@ -503,188 +189,46 @@ public class PairHMMIndelErrorModel {
            matchMetricArray[0][j]  = Double.NEGATIVE_INFINITY;
            XMetricArray[0][j]      = Double.NEGATIVE_INFINITY;
            YMetricArray[0][j]      = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
            bestActionArrayY[0][j] = bestActionArrayM[0][j] = bestActionArrayX[0][j] = LEFT_GOTO_Y;
        }
        for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
-            int im1 = indI-1;
+            final int im1 = indI-1;
            for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
-                int jm1 = indJ-1;
+                final int jm1 = indJ-1;
-                byte x = readBases[im1];
+                final byte x = readBases[im1];
-                byte y = haplotypeBases[jm1];
+                final byte y = haplotypeBases[jm1];
-                byte qual = readQuals[im1];
+                final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
-
+                final double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
                double bestMetric = 0.0;
                int bestMetricIdx = 0;
                // compute metric for match/mismatch
                // workaround for reads whose bases quality = 0,
                if (qual < 1)
                    qual = 1;
                if (qual > MAX_CACHED_QUAL)
                    qual = MAX_CACHED_QUAL;
                double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
                double[] metrics = new double[3];
                if (doViterbi) {
                    // update match array
                    metrics[MATCH_OFFSET] = matchMetricArray[im1][jm1] + pBaseRead;
                    metrics[X_OFFSET] = XMetricArray[im1][jm1] + pBaseRead;
                    metrics[Y_OFFSET] = YMetricArray[im1][jm1] + pBaseRead;
                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
                    bestMetric = metrics[bestMetricIdx];
                }
                else
                    bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
                            YMetricArray[im1][jm1] + pBaseRead);
                double bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead,
                                                      XMetricArray[im1][jm1] + pBaseRead,
                                                      YMetricArray[im1][jm1] + pBaseRead);
                matchMetricArray[indI][indJ] = bestMetric;
                bestActionArrayM[indI][indJ] = ACTIONS_M[bestMetricIdx];
                // update X array
                // State X(i,j): X(1:i) aligned to a gap in Y(1:j).
                // When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps
-                //c = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
+                final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-                //d = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
+                final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
-                if (getGapPenaltiesFromFile) {
+                bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
                    c = currentGOP[im1];
                    d = logGapContinuationProbability;
                } else {
                    c = currentGOP[jm1];
                    d = currentGCP[jm1];
                }
                if (indJ == Y_METRIC_LENGTH-1)
                    c = d = END_GAP_COST;
                if (doViterbi) {
                    metrics[MATCH_OFFSET] = matchMetricArray[im1][indJ] + c;
                    metrics[X_OFFSET] = XMetricArray[im1][indJ] + d;
                    metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability;
                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
                    bestMetric = metrics[bestMetricIdx];
                }
                else
                    bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c, XMetricArray[im1][indJ] + d);
                XMetricArray[indI][indJ] = bestMetric;
                bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx];
                // update Y array
                //c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
                //d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
-                if (getGapPenaltiesFromFile) {
+                final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
-                    c = currentGOP[im1];
+                final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
-                    d = logGapContinuationProbability;
+                bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
                }
                else {
                    c = currentGOP[jm1];
                    d = currentGCP[jm1];                        
                }
                if (indI == X_METRIC_LENGTH-1)
                    c = d = END_GAP_COST;
                if (doViterbi) {
                    metrics[MATCH_OFFSET] = matchMetricArray[indI][jm1] + c;
                    metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability;
                    metrics[Y_OFFSET] = YMetricArray[indI][jm1] + d;
                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
                    bestMetric = metrics[bestMetricIdx];
                }
                else
                    bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c, YMetricArray[indI][jm1] + d);
                YMetricArray[indI][indJ] = bestMetric;
                bestActionArrayY[indI][indJ] = ACTIONS_Y[bestMetricIdx];
            }
        }
-        double bestMetric;
+        final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
-        double metrics[] = new double[3];
+        final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
-        int bestTable=0, bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
+                                                    XMetricArray[bestI][bestJ],
-        metrics[MATCH_OFFSET] = matchMetricArray[bestI][bestJ];
+                                                    YMetricArray[bestI][bestJ]);
        metrics[X_OFFSET] = XMetricArray[bestI][bestJ];
        metrics[Y_OFFSET] = YMetricArray[bestI][bestJ];
        if (doViterbi) {
            bestTable = MathUtils.maxElementIndex(metrics);
            bestMetric = metrics[bestTable];
        }
        else
            bestMetric = MathUtils.softMax(metrics);
        // Do traceback (needed only for debugging!)
        if (DEBUG && doViterbi) {
            int bestAction;
            int i = bestI;
            int j = bestJ;
            System.out.println("Affine gap NW");
            String haplotypeString = new String (haplotypeBases);
            String readString = new String(readBases);
            while (i >0 || j >0) {
                if (bestTable == X_OFFSET) {
                    // insert gap in Y
                    haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
                    bestAction = bestActionArrayX[i][j];
                }
                else if (bestTable == Y_OFFSET) {
                    readString = readString.substring(0,i)+"-"+readString.substring(i);
                    bestAction = bestActionArrayY[i][j];
                }
                else {
                    bestAction = bestActionArrayM[i][j];
                }
                System.out.print(bestAction);
                // bestAction contains action to take at next step
                // encoding of bestAction: upper 2 bits = direction, lower 2 bits = next table
                // bestTable and nextDirection for next step
                bestTable = bestAction & 0x3;
                int nextDirection = bestAction >> 2;
                if (nextDirection == UP) {
                    i--;
                } else if (nextDirection == LEFT) {
                    j--;
                } else { //  if (nextDirection == DIAG)
                    i--; j--;
                }
            }
            System.out.println("\nAlignment: ");
            System.out.println("R:"+readString);
            System.out.println("H:"+haplotypeString);
            System.out.println();
        }
        if (DEBUG)
            System.out.format("Likelihood: %5.4f\n", bestMetric);
@ -707,12 +251,12 @@ public class PairHMMIndelErrorModel {
        }
    }
    public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
-                                                                   ReferenceContext ref, int eventLength,
+                                                                 ReferenceContext ref, int eventLength,
-                                                                   HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
+                                                                 HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
        int numHaplotypes = haplotypeMap.size();
-        double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
+        final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
-        double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes];
+        final int readCounts[] = new int[pileup.size()];
        int readIdx=0;
        LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
@ -723,34 +267,35 @@ public class PairHMMIndelErrorModel {
            System.out.println(new String(ref.getBases()));
        }
-        if (doContextDependentPenalties && !getGapPenaltiesFromFile)   {
+        // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
-            // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
+        // todo -- refactor into separate function
-
+        for (Allele a: haplotypeMap.keySet()) {
-
+            Haplotype haplotype = haplotypeMap.get(a);
-            for (Allele a: haplotypeMap.keySet()) {
+            byte[] haplotypeBases = haplotype.getBasesAsBytes();
-                Haplotype haplotype = haplotypeMap.get(a);
+            double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
-                byte[] haplotypeBases = haplotype.getBasesAsBytes();
+            double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
                double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
                double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
                // get homopolymer length profile for current haplotype
                int[] hrunProfile = new int[haplotypeBases.length];
                getContextHomopolymerLength(haplotypeBases,hrunProfile);
                if (DEBUG) {
                    System.out.println("Haplotype bases:");
                    System.out.println(new String(haplotypeBases));
                    for (int i=0; i < hrunProfile.length; i++)
                        System.out.format("%d",hrunProfile[i]);
                    System.out.println();
                }
                fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
                gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
                gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
            // get homopolymer length profile for current haplotype
            int[] hrunProfile = new int[haplotypeBases.length];
            getContextHomopolymerLength(haplotypeBases,hrunProfile);
            if (DEBUG) {
                System.out.println("Haplotype bases:");
                System.out.println(new String(haplotypeBases));
                for (int i=0; i < hrunProfile.length; i++)
                    System.out.format("%d",hrunProfile[i]);
                System.out.println();
            }
            fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
            gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
            gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
        }
        for (PileupElement p: pileup) {
            // > 1 when the read is a consensus read representing multiple independent observations
            final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
            readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;
            // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
            if (indelLikelihoodMap.containsKey(p)) {
@ -762,61 +307,20 @@ public class PairHMMIndelErrorModel {
            }
            else {
                //System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
-                GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
+                SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
                if (read == null)
                    continue;
-                if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) {
+                if ( isReduced ) {
                    read = ReadUtils.reducedReadWithReducedQuals(read);
                }
                if(ReadUtils.is454Read(read)) {
                    continue;
                }
                double[] recalQuals = null;
 /*
                if (getGapPenaltiesFromFile) {
                    RecalDataManager.parseSAMRecord( read, RAC );
                    recalQuals = new double[read.getReadLength()];
                    //compute all covariate values for this read
                    final Comparable[][] covariateValues_offset_x_covar =
                            RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
                    // For each base in the read
                    for( int offset = 0; offset < read.getReadLength(); offset++ ) {
                        final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
                        Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
                        if(qualityScore == null)
                        {
                            qualityScore = performSequentialQualityCalculation( fullCovariateKey );
                            qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
                        }
                        recalQuals[offset] = -((double)qualityScore)/10.0;
                    }
                    // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
                    // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
                    if (DEBUG)  {
                        System.out.format("\n\nStarting read:%s S:%d US:%d E:%d UE:%d C:%s\n",read.getReadName(),
                                read.getAlignmentStart(),
                                read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
                                read.getCigarString());
                        byte[] bases = read.getReadBases();
                        for (int k = 0; k < recalQuals.length; k++) {
                            System.out.format("%c",bases[k]);
                        }
                        System.out.println();
                        for (int k = 0; k < recalQuals.length; k++) {
                            System.out.format("%.0f ",recalQuals[k]);
                        }
                        System.out.println();
                    }
                }        */
                // get bases of candidate haplotypes that overlap with reads
                final int trailingBases = 3;
@ -937,11 +441,6 @@ public class PairHMMIndelErrorModel {
                            unclippedReadBases.length-numEndClippedBases);
                    double[] recalCDP = null;
                    if (getGapPenaltiesFromFile) {
                        recalCDP = Arrays.copyOfRange(recalQuals,numStartClippedBases,
                                unclippedReadBases.length-numEndClippedBases);
                    }
                    if (DEBUG) {
                        System.out.println("Read bases:");
@ -971,27 +470,9 @@ public class PairHMMIndelErrorModel {
                            System.out.println(new String(haplotypeBases));
                        }
-                        Double readLikelihood = 0.0;
+                        final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
-                        if (useAffineGapModel) {
+                        final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
-
+                        final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
                            double[] currentContextGOP = null;
                            double[] currentContextGCP = null;
                            if (doContextDependentPenalties) {
                               if (getGapPenaltiesFromFile) {
                                   readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
                               }  else {
                                   currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
                                   currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
                                   readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
                               }
                            }
                        }
                        else
                            readLikelihood = computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
                        readEl.put(a,readLikelihood);
                        readLikelihoods[readIdx][j++] = readLikelihood;
@ -1004,7 +485,7 @@ public class PairHMMIndelErrorModel {
        if (DEBUG) {
            System.out.println("\nLikelihood summary");
-            for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
+            for (readIdx=0; readIdx < pileup.size(); readIdx++) {
                System.out.format("Read Index: %d ",readIdx);
                for (int i=0; i < readLikelihoods[readIdx].length; i++)
                    System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
@ -1012,123 +493,41 @@ public class PairHMMIndelErrorModel {
            }
        }
        return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
    }
    private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
        final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
        // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
        for (int i=0; i < numHaplotypes; i++) {
            for (int j=i; j < numHaplotypes; j++){
                // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
                // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
                //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
-                 for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
+                for (int readIdx = 0; readIdx < readLikelihoods.length; readIdx++) {
                    // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
                    // First term is approximated by Jacobian log with table lookup.
                    if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
                        continue;
-                    haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
+                    final double li = readLikelihoods[readIdx][i];
-                            readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
+                    final double lj = readLikelihoods[readIdx][j];
-
+                    final int readCount = readCounts[readIdx];
                    haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
                }
            }
        }
-        return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
+        final double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
    }
    public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
        int hSize = haplotypeLikehoodMatrix.length;
        double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
        int k=0;
-        double maxElement = Double.NEGATIVE_INFINITY;
+        for (int j=0; j < numHaplotypes; j++) {
        for (int j=0; j < hSize; j++) {
            for (int i=0; i <= j; i++){
                genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
                if (haplotypeLikehoodMatrix[i][j] > maxElement)
                    maxElement = haplotypeLikehoodMatrix[i][j];
            }
        }
-        // renormalize
+        // renormalize   so that max element is zero.
-        for (int i=0; i < genotypeLikelihoods.length; i++)
+        return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
            genotypeLikelihoods[i] -= maxElement;
        return genotypeLikelihoods;
    }
    /**
     * Implements a serial recalibration of the reads using the combinational table.
     * First, we perform a positional recalibration, and then a subsequent dinuc correction.
     *
     * Given the full recalibration table, we perform the following preprocessing steps:
     *
     *   - calculate the global quality score shift across all data [DeltaQ]
     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
     *   - The final shift equation is:
     *
     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
     * @param key The list of Comparables that were calculated from the covariates
     * @return A recalibrated quality score as a byte
     */
 /*
    private byte performSequentialQualityCalculation( final Object... key ) {
        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
        final Object[] readGroupCollapsedKey = new Object[1];
        final Object[] qualityScoreCollapsedKey = new Object[2];
        final Object[] covariateCollapsedKey = new Object[3];
        // The global quality shift (over the read group only)
        readGroupCollapsedKey[0] = key[0];
        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
        double globalDeltaQ = 0.0;
        if( globalRecalDatum != null ) {
            final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
            final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
        }
        // The shift in quality between reported and empirical
        qualityScoreCollapsedKey[0] = key[0];
        qualityScoreCollapsedKey[1] = key[1];
        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
        double deltaQReported = 0.0;
        if( qReportedRecalDatum != null ) {
            final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
        }
        // The shift in quality due to each covariate by itself in turn
        double deltaQCovariates = 0.0;
        double deltaQCovariateEmpirical;
        covariateCollapsedKey[0] = key[0];
        covariateCollapsedKey[1] = key[1];
        for( int iii = 2; iii < key.length; iii++ ) {
            covariateCollapsedKey[2] =  key[iii]; // The given covariate
            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
            if( covariateRecalDatum != null ) {
                deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
            }
        }
        final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
        // Verbose printouts used to validate with old recalibrator
        //if(key.contains(null)) {
        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
        //}
        //else {
        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
        //}
        //return newQualityByte;
    }
 */
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@ -68,26 +68,59 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.*;
 import java.util.*;
 /**
 * Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
 * but this latter functionality is now superceded by UnifiedGenotyper.
 *
 * <p>
 * This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
- * data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
+ * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
- * and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
+ * include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
- * or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
+ * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
- * the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
+ * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
- * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
+ * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
- * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
+ * metrics for the post-processing tools to make the final decision). The calls are performed by default
- * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
+ * from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
 * command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
 * respectively. Indels are called from the tumor sample and annotated as germline
 * if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
 * if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
 * is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
 * in tumor sample, it will not be reported.
 *
- * <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
+ * To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
- * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
+ * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
 * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
 *
 * <h2>Input</h2>
 * <p>
 * Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
 * </p>
 *
 * <h2>Output</h2>
 * <p>
 * Indel calls with associated metrics.
 * </p>
 *
 * <h2>Examples</h2>
 * <pre>
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
 *   -T SomaticIndelDetector \
 *   -o indels.vcf \
 *   -verbose indels.txt
 *   -I:normal normal.bam \
 *   -I:tumor tumor.bam
 * </pre>
 *
 */
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
 public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
 //    @Output
 //    PrintStream out;
-    @Output(doc="File to which variants should be written",required=true)
+    @Output(doc="File to write variants (indels) in VCF format",required=true)
    protected VCFWriter vcf_writer = null;
    @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
@ -102,68 +135,80 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
    @Hidden
    @Argument(fullName = "genotype_intervals", shortName = "genotype",
-            doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
+        doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
    public String genotypeIntervalsFile = null;
    @Hidden
    @Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
-            doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
+        doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
-                "if the list turns out to be unsorted, it will throw an exception.  "+
+            "if the list turns out to be unsorted, it will throw an exception.  "+
-                "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
+            "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
-                "to sort and keep it in memory (increases memory usage!).")
+            "to sort and keep it in memory (increases memory usage!).")
    protected boolean GENOTYPE_NOT_SORTED = false;
    @Hidden
-	@Argument(fullName="unpaired", shortName="unpaired",
+    @Argument(fullName="unpaired", shortName="unpaired",
-			doc="Perform unpaired calls (no somatic status detection)", required=false)
+                    doc="Perform unpaired calls (no somatic status detection)", required=false)
    boolean call_unpaired = false;
-	boolean call_somatic ;
+    boolean call_somatic ;
-	@Argument(fullName="verboseOutput", shortName="verbose",
+    @Argument(fullName="verboseOutput", shortName="verbose",
-			doc="Verbose output file in text format", required=false)
+                    doc="Verbose output file in text format", required=false)
-	java.io.File verboseOutput = null;
+    java.io.File verboseOutput = null;
    @Argument(fullName="bedOutput", shortName="bed",
-            doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
+        doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
    java.io.File bedOutput = null;
-	@Argument(fullName="minCoverage", shortName="minCoverage",
+    @Argument(fullName="minCoverage", shortName="minCoverage",
-			doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
+                    doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
-	int minCoverage = 6;
+            "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
    int minCoverage = 6;
-	@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
+    @Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
-			doc="used only with --somatic;  normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
+                    doc="used only in default (somatic) mode;  normal sample must have at least minNormalCoverage "+
-	int minNormalCoverage = 4;
+            "or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
    int minNormalCoverage = 4;
-	@Argument(fullName="minFraction", shortName="minFraction",
+    @Argument(fullName="minFraction", shortName="minFraction",
-			doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
+                    doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
-			" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
+                    " (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
-	double minFraction = 0.3;
+    double minFraction = 0.3;
-	@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
+    @Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
-			doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
+                    doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
-	double minConsensusFraction = 0.7;
+            "all indel observations at the site exceeds this threshold", required=false)
    double minConsensusFraction = 0.7;
-	@Argument(fullName="minIndelCount", shortName="minCnt",
+    @Argument(fullName="minIndelCount", shortName="minCnt",
-			doc="Minimum count of reads supporting consensus indel required for making the call. "+
+                    doc="Minimum count of reads supporting consensus indel required for making the call. "+
-			" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
+                    " This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
-			"(minIndelCount not met) will not pass.", required=false)
+                    "(minIndelCount not met) will not pass.", required=false)
-	int minIndelCount = 0;
+    int minIndelCount = 0;
-	@Argument(fullName="refseq", shortName="refseq",
+    @Argument(fullName="refseq", shortName="refseq",
-			doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
+                    doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with "+
-	String RefseqFileName = null;
+            "GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
    String RefseqFileName = null;
-    @Argument(fullName="blacklistedLanes", shortName="BL",
+//@Argument(fullName="blacklistedLanes", shortName="BL",
-            doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
+//        doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
-                    "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
+//                "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
-    PlatformUnitFilterHelper dummy;
+//PlatformUnitFilterHelper dummy;
-     @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
+
    @Hidden
    @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
            required=false) Boolean DEBUG = false;
    @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
-            "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
+            "May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
            "window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
            "than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
            "if window is too small",required=false) int WINDOW_SIZE = 200;
    @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
                " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;
 	private WindowContext tumor_context;
 	private WindowContext normal_context; 
 	private int currentContigIndex = -1;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java
@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
    public PhasingRead(int length, int mappingQual) {
        super(length);
-        this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
+        this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));
        this.baseProbs = new PreciseNonNegativeDouble[length];
        Arrays.fill(this.baseProbs, null);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java
@ -44,12 +44,12 @@ public class RefSeqDataParser {
        String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";
        Map<String, String> entriesToNames = new HashMap<String, String>();
-        Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
+        int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
-        if (numRecords != null) {
+        if (numRecords != -1) {
            boolean done = false;
            if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
-                String name = vc.getAttributeAsStringNoException(nameKeyToUse);
+                String name = vc.getAttributeAsString(nameKeyToUse, null);
                if (name != null) {
                    entriesToNames.put(nameKeyToUse, name);
                    done = true;
@ -59,14 +59,14 @@ public class RefSeqDataParser {
            if (!done) {
                for (int i = 1; i <= numRecords; i++) {
                    String key = nameKeyToUseMultiplePrefix + i;
-                    String name = vc.getAttributeAsStringNoException(key);
+                    String name = vc.getAttributeAsString(key, null);
                    if (name != null)
                        entriesToNames.put(key, name);
                }
            }
        }
        else { // no entry with the # of records:
-            String name = vc.getAttributeAsStringNoException(nameKeyToUse);
+            String name = vc.getAttributeAsString(nameKeyToUse, null);
            if (name != null) {
                entriesToNames.put(nameKeyToUse, name);
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
@ -42,6 +42,7 @@ import java.util.*;
 *
 * <p>Body test</p>
 */
@Hidden
 public class DocumentationTest extends RodWalker<Integer, Integer> {
    // the docs for the arguments are in the collection
    @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@ -76,6 +76,42 @@ import java.util.Map;
 * <h2>Output</h2>
 * <p>
 * A recalibration table file in CSV format that is used by the TableRecalibration walker.
 * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.  
 *
 * The first 20 lines of such a file is shown below.  
 * * The file begins with a series of comment lines describing:
 * ** The number of counted loci
 * ** The number of counted bases
 * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
 * 
 * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.  
 *
 * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
 * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of 
 * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
 * 
 * <pre>
 * # Counted Sites    19451059
 * # Counted Bases    56582018
 * # Skipped Sites    82666
 * # Fraction Skipped 1 / 235 bp
 * ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
 * SRR006446,11,65,CA,9,1,10
 * SRR006446,11,48,TA,10,0,40
 * SRR006446,11,67,AA,27,0,40
 * SRR006446,11,61,GA,11,1,10
 * SRR006446,12,34,CA,47,1,17
 * SRR006446,12,30,GA,52,1,17
 * SRR006446,12,36,AA,352,1,25
 * SRR006446,12,17,TA,182,11,12
 * SRR006446,11,48,TG,2,0,40
 * SRR006446,11,67,AG,1,0,40
 * SRR006446,12,34,CG,9,0,40
 * SRR006446,12,30,GG,43,0,40
 * ERR001876,4,31,AG,1,0,40
 * ERR001876,4,31,AT,2,2,1
 * ERR001876,4,31,CA,1,0,40
 * </pre>
 * </p>
 *
 * <h2>Examples</h2>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@ -61,7 +61,7 @@ import java.util.List;
 * CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
 *</pre>
 * are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
- *
+ *<pre>
 * Valid                     // amplicon is valid
 * SITE_IS_FILTERED=1        // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
 * VARIANT_TOO_NEAR_PROBE=1  // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
@ -72,10 +72,10 @@ import java.util.List;
 * END_TOO_CLOSE,            // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
 * NO_VARIANTS_FOUND,        // no variants found within the amplicon region
 * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
- * </p>
+ * </pre></p>
 *
 * <h2>Examples</h2>
- * <pre></pre>
+ * <pre>
 *    java
 *      -jar GenomeAnalysisTK.jar
 *      -T ValidationAmplicons
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -55,7 +55,23 @@ import java.util.*;
 *
 * <h2>Output</h2>
 * <p>
- * Evaluation tables.
+ * Evaluation tables detailing the results of the eval modules which were applied.
 * For example:
 * <pre>
 * output.eval.gatkreport:
 * ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample
 * CountVariants  CompRod   CpG      EvalRod  JexlExpression  Novelty  nProcessedLoci  nCalledLoci  nRefLoci  nVariantLoci  variantRate ...
 * CountVariants  dbsnp     CpG      eval     none            all      65900028        135770       0         135770        0.00206024  ...
 * CountVariants  dbsnp     CpG      eval     none            known    65900028        47068        0         47068         0.00071423  ...
 * CountVariants  dbsnp     CpG      eval     none            novel    65900028        88702        0         88702         0.00134601  ...
 * CountVariants  dbsnp     all      eval     none            all      65900028        330818       0         330818        0.00502000  ...
 * CountVariants  dbsnp     all      eval     none            known    65900028        120685       0         120685        0.00183133  ...
 * CountVariants  dbsnp     all      eval     none            novel    65900028        210133       0         210133        0.00318866  ...
 * CountVariants  dbsnp     non_CpG  eval     none            all      65900028        195048       0         195048        0.00295976  ...
 * CountVariants  dbsnp     non_CpG  eval     none            known    65900028        73617        0         73617         0.00111710  ...
 * CountVariants  dbsnp     non_CpG  eval     none            novel    65900028        121431       0         121431        0.00184265  ...
 * ...
 * </pre>
 * </p>
 *
 * <h2>Examples</h2>
@ -149,12 +165,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
    protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;
    @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
    private String TRANCHE_FILENAME = null;
    @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
    private File ancestralAlignmentsFile = null;
    @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
    private boolean requireStrictAlleleMatch = false;
    // Variables
    private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();
@ -226,16 +242,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        }
        sampleNamesForStratification.add(ALL_SAMPLE_NAME);
        // Add select expressions for anything in the tranches file
        if ( TRANCHE_FILENAME != null ) {
            // we are going to build a few select names automatically from the tranches file
            for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
                logger.info("Adding select for all variant above the pCut of : " + t);
                SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
                SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
            }
        }
        // Initialize select expressions
        for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
            SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
@ -245,18 +251,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        // Initialize the set of stratifications and evaluations to use
        stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
        Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
        boolean usingJEXL = false;
        for ( VariantStratifier vs : getStratificationObjects() ) {
            if ( vs.getClass().getSimpleName().equals("Filter") )
                byFilterIsEnabled = true;
            else if ( vs.getClass().getSimpleName().equals("Sample") )
                perSampleIsEnabled = true;
            usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class);
        }
        if ( TRANCHE_FILENAME != null && ! usingJEXL )
            throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled");
        // Initialize the evaluation contexts
        evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);
@ -378,16 +379,16 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        if ( matchingComps.size() == 0 )
            return null;
-        // find the comp which matches the alternate allele from eval
+        // find the comp which matches both the reference allele and alternate allele from eval
        Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
        for ( VariantContext comp : matchingComps ) {
            Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
-            if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp)) )
+            if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
                return comp;
        }
-        // if none match, just return the first one
+        // if none match, just return the first one unless we require a strict match
-        return matchingComps.get(0);
+        return (requireStrictAlleleMatch ? null : matchingComps.get(0));
    }
    public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
@ -22,9 +22,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
    @DataPoint(description = "number of eval SNP sites")
    long nEvalVariants = 0;
    @DataPoint(description = "number of comp SNP sites")
    long nCompVariants = 0;
    @DataPoint(description = "number of eval sites outside of comp sites")
    long novelSites = 0;
@ -75,10 +72,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
    }
    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        boolean evalIsGood = eval != null && eval.isVariant();
+        boolean evalIsGood = eval != null && eval.isPolymorphic();
-        boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
+        boolean compIsGood = comp != null && comp.isNotFiltered();
        if (compIsGood) nCompVariants++;           // count the number of comp events
        if (evalIsGood) nEvalVariants++;           // count the number of eval events
        if (compIsGood && evalIsGood) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
@ -100,21 +100,22 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
        // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
        // added in a proxy check for monomorphic status here.
        // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
-       if ( !vc1.isVariant() || (vc1.hasGenotypes() &&  vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
+       if ( vc1.isMonomorphic() ) {
            nRefLoci++;
        } else {
             switch (vc1.getType()) {
                case NO_VARIATION:
                    // shouldn't get here
                    break;
                case SNP:
                    nVariantLoci++;
                    nSNPs++;
-                    if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
+                    if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
                    break;
                case MNP:
                    nVariantLoci++;
                    nMNPs++;
-                    if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
+                    if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
                    break;
                case INDEL:
                    nVariantLoci++;
@ -136,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
        String refStr = vc1.getReference().getBaseString().toUpperCase();
-        String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
+        String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
 //        if (aaStr.equals(".")) {
 //            aaStr = refStr;
 //        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java
@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
    }
    public static Double getPQ(Genotype gt) {
-        return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
+        Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
        return d == -1 ? null : d;
    }
    public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
    public int getComparisonOrder() { return 1; } // need only the evals
    public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
            //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
            return vc1.toString(); // biallelic sites are output
        }
-        if ( vc1.isIndel() ) {
+        if ( vc1.isIndel() && vc1.isPolymorphic() ) {
            if ( ! vc1.isBiallelic() ) {
                //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
                return vc1.toString(); // biallelic sites are output
            }
            // only count simple insertions/deletions, not complex indels
            if ( vc1.isSimpleInsertion() ) {
                indelHistogram.update(vc1.getAlternateAllele(0).length());
            } else if ( vc1.isSimpleDeletion() ) {
                indelHistogram.update(-vc1.getReference().length());
            } else {
                throw new ReviewedStingException("Indel type that is not insertion or deletion.");
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {
    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (eval != null ) {
+        if (eval != null && eval.isPolymorphic()) {
            if ( indelStats == null ) {
                indelStats = new IndelStats(eval);
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java
@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
            if ( eval.hasGenotypes() )
                ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
            else if ( eval.hasAttribute("AC") ) {
-                ac = Integer.valueOf(eval.getAttributeAsString("AC"));
+                ac = eval.getAttributeAsInt("AC", -1);
            }
            if ( ac != -1 ) {
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
                }
            }
-            if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
+            if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
                metrics.incrValue(eval);
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
    }
    public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
+        if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
            return null; //no interesting sites
        }
-        if (vc.hasGenotypes()) {
+        //this maps allele to a count
        ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
-            //this maps allele to a count
+        int numHetsHere = 0;
-            ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
+        float numGenosHere = 0;
        int numIndsHere = 0;
-            int numHetsHere = 0;
+        for (Genotype genotype : vc.getGenotypes().values()) {
-            float numGenosHere = 0;
+            numIndsHere++;
-            int numIndsHere = 0;
+            if (!genotype.isNoCall()) {
                //increment stats for heterozygosity
                if (genotype.isHet()) {
                    numHetsHere++;
                }
-            for (Genotype genotype : vc.getGenotypes().values()) {
+                numGenosHere++;
-                numIndsHere++;
+                //increment stats for pairwise mismatches
                if (!genotype.isNoCall()) {
                    //increment stats for heterozygosity
                    if (genotype.isHet()) {
                        numHetsHere++;
                    }
-                    numGenosHere++;
+                for (Allele allele : genotype.getAlleles()) {
-                    //increment stats for pairwise mismatches
+                    if (allele.isNonNull() && allele.isCalled()) {
-
+                        String alleleString = allele.toString();
-                    for (Allele allele : genotype.getAlleles()) {
+                        alleleCounts.putIfAbsent(alleleString, 0);
-                        if (allele.isNonNull() && allele.isCalled()) {
+                        alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
                            String alleleString = allele.toString();
                            alleleCounts.putIfAbsent(alleleString, 0);
                            alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
                        }
                    }
                }
            }
-            if (numGenosHere > 0) {
+        }
-                //only if have one called genotype at least
+        if (numGenosHere > 0) {
-                this.numSites++;
+            //only if have one called genotype at least
            this.numSites++;
-                this.totalHet += numHetsHere / numGenosHere;
+            this.totalHet += numHetsHere / numGenosHere;
-                //compute based on num sites
+            //compute based on num sites
-                float harmonicFactor = 0;
+            float harmonicFactor = 0;
-                for (int i = 1; i <= numIndsHere; i++) {
+            for (int i = 1; i <= numIndsHere; i++) {
-                    harmonicFactor += 1.0 / i;
+                harmonicFactor += 1.0 / i;
-                }
+            }
-                this.thetaRegionNumSites += 1.0 / harmonicFactor;
+            this.thetaRegionNumSites += 1.0 / harmonicFactor;
-                //now compute pairwise mismatches
+            //now compute pairwise mismatches
-                float numPairwise = 0;
+            float numPairwise = 0;
-                float numDiffs = 0;
+            float numDiffs = 0;
-                for (String allele1 : alleleCounts.keySet()) {
+            for (String allele1 : alleleCounts.keySet()) {
-                    int allele1Count = alleleCounts.get(allele1);
+                int allele1Count = alleleCounts.get(allele1);
-                    for (String allele2 : alleleCounts.keySet()) {
+                for (String allele2 : alleleCounts.keySet()) {
-                        if (allele1.compareTo(allele2) < 0) {
+                    if (allele1.compareTo(allele2) < 0) {
-                            continue;
+                        continue;
-                        }
+                    }
-                        if (allele1 .compareTo(allele2) == 0) {
+                    if (allele1 .compareTo(allele2) == 0) {
-                            numPairwise += allele1Count * (allele1Count - 1) * .5;
+                        numPairwise += allele1Count * (allele1Count - 1) * .5;
-                        }
+                    }
-                        else {
+                    else {
-                            int allele2Count = alleleCounts.get(allele2);
+                        int allele2Count = alleleCounts.get(allele2);
-                            numPairwise += allele1Count * allele2Count;
+                        numPairwise += allele1Count * allele2Count;
-                            numDiffs += allele1Count * allele2Count;
+                        numDiffs += allele1Count * allele2Count;
                        }
                    }
                }
            }
-                if (numPairwise > 0) {
+            if (numPairwise > 0) {
-                    this.totalAvgDiffs += numDiffs / numPairwise;
+                this.totalAvgDiffs += numDiffs / numPairwise;
                }
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
    }
    public void updateTiTv(VariantContext vc, boolean updateStandard) {
-        if (vc != null && vc.isSNP() && vc.isBiallelic()) {
+        if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
            if (VariantContextUtils.isTransition(vc)) {
                if (updateStandard) nTiInComp++;
                else nTi++;
@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
                else nTv++;
            }
-            String refStr = vc.getReference().getBaseString().toUpperCase();
+            if (vc.hasAttribute("ANCESTRALALLELE")) {
-            String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
+                final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
-
+                if ( ! aaStr.equals(".") ) {
-            if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
+                    switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
-                BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
+                        case TRANSITION: nTiDerived++; break;
-
+                        case TRANSVERSION: nTvDerived++; break;
-                //System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
+                        default: break;
-
+                    }
                if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
                    nTiDerived++;
                } else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
                    nTvDerived++;
                }
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
    public SiteStatus calcSiteStatus(VariantContext vc) {
        if ( vc == null ) return SiteStatus.NO_CALL;
        if ( vc.isFiltered() ) return SiteStatus.FILTERED;
-        if ( ! vc.isVariant() ) return SiteStatus.MONO;
+        if ( vc.isMonomorphic() ) return SiteStatus.MONO;
        if ( vc.hasGenotypes() ) return SiteStatus.POLY;  // must be polymorphic if isMonomorphic was false and there are genotypes
        if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
            int ac = 0;
@ -130,10 +131,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
 ////                System.out.printf("  ac = %d%n", ac);
            }
            else
-                ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
+                ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
            return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
        } else if ( vc.hasGenotypes() ) {
            return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
        } else {
            return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
            //return SiteStatus.NO_CALL; // we can't figure out what to do
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        final String interesting = null;
-        if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
+        if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
            if( titvStats == null ) { titvStats = new TiTvStats(); }
            titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
        if (eval != null) {
            int AC = -1;
            if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
-                AC = eval.getAttributeAsInt("AC");
+                AC = eval.getAttributeAsInt("AC", 0);
            } else if ( eval.isVariant() ) {
                for (Allele allele : eval.getAlternateAlleles())
                    AC = Math.max(AC, eval.getChromosomeCount(allele));
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java
@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {
        if (eval != null) {
            try {
-                relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
+                relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
            } catch (Exception e) {
                return relevantStates;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java
@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
            Integer frame = null;
            if (eval.hasAttribute("refseq.functionalClass")) {
-                aa = eval.getAttributeAsString("refseq.variantAA");
+                aa = eval.getAttributeAsString("refseq.variantAA", null);
-                frame = eval.getAttributeAsInt("refseq.frame");
+                frame = eval.getAttributeAsInt("refseq.frame", 0);
            } else if (eval.hasAttribute("refseq.functionalClass_1")) {
                int annotationId = 1;
                String key;
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
                do {
                    key = String.format("refseq.functionalClass_%d", annotationId);
-                    String newtype = eval.getAttributeAsString(key);
+                    String newtype = eval.getAttributeAsString(key, null);
                    if ( newtype != null &&
                            ( type == null ||
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
                        type = newtype;
                        String aakey = String.format("refseq.variantAA_%d", annotationId);
-                        aa = eval.getAttributeAsString(aakey);
+                        aa = eval.getAttributeAsString(aakey, null);
                        if (aa != null) {
                            String framekey = String.format("refseq.frame_%d", annotationId);
                            if (eval.hasAttribute(framekey)) {
-                                frame = eval.getAttributeAsInt(framekey);
+                                frame = eval.getAttributeAsInt(framekey, 0);
                            }
                        }
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.ArrayList;
@ -11,25 +12,34 @@ import java.util.List;
 * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
 */
 public class FunctionalClass extends VariantStratifier {
-    @Override
+
-    public void initialize() {
+    public enum FunctionalType {
-        states.add("all");
+        silent,
-        states.add("silent");
+        missense,
-        states.add("missense");
+        nonsense
        states.add("nonsense");
    }
-    public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
+    @Override
    public void initialize() {
        states.add("all");
        for ( FunctionalType type : FunctionalType.values() )
            states.add(type.name());
    }
 public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
        ArrayList<String> relevantStates = new ArrayList<String>();
        relevantStates.add("all");
        if (eval != null && eval.isVariant()) {
-            String type = null;
+            FunctionalType type = null;
            if (eval.hasAttribute("refseq.functionalClass")) {
-                type = eval.getAttributeAsString("refseq.functionalClass");
+                try {
                    type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
                } catch ( Exception e ) {} // don't error out if the type isn't supported
            } else if (eval.hasAttribute("refseq.functionalClass_1")) {
                int annotationId = 1;
                String key;
@ -37,24 +47,36 @@ public class FunctionalClass extends VariantStratifier {
                do {
                    key = String.format("refseq.functionalClass_%d", annotationId);
-                    String newtype = eval.getAttributeAsString(key);
+                    String newtypeStr = eval.getAttributeAsString(key, null);
-
+                    if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
-                    if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
+                        try {
-                         ( type == null ||
+                            FunctionalType newType = FunctionalType.valueOf(newtypeStr);
-                         ( type.equals("silent") && !newtype.equals("silent") ) ||
+                            if ( type == null ||
-                         ( type.equals("missense") && newtype.equals("nonsense") ) )
+                                ( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
-                       ) {
+                                ( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
-                        type = newtype;
+                                type = newType;
                            }
                        } catch ( Exception e ) {} // don't error out if the type isn't supported
                    }
                    annotationId++;
                } while (eval.hasAttribute(key));
            } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) {
                try {
                    SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString());
                    if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE )
                        type = FunctionalType.nonsense;
                    else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE )
                        type = FunctionalType.missense;
                    else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT )
                        type = FunctionalType.silent;
                    }
                catch ( Exception e ) {} // don't error out if the type isn't supported
            }
-            if (type != null) {
+            if ( type != null ) {
-                if      (type.equals("silent"))   { relevantStates.add("silent");   }
+                relevantStates.add(type.name());
                else if (type.equals("missense")) { relevantStates.add("missense"); }
                else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@ -277,7 +277,7 @@ public class VariantEvalUtils {
     * @return a new VariantContext with just the requested samples
     */
    public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
-        VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
+        VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());
        HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());
@ -354,7 +354,7 @@ public class VariantEvalUtils {
    private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
        if ( !mappings.containsKey(sample) )
-            mappings.put(sample, new HashSet<VariantContext>());
+            mappings.put(sample, new LinkedHashSet<VariantContext>());
        mappings.get(sample).add(vc);
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
@ -0,0 +1,76 @@
 /*
 * Copyright (c) 2011 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 /**
 * Created by IntelliJ IDEA.
 * User: rpoplin
 * Date: 3/12/11
 */
 public class TrainingSet {
    public RodBinding<VariantContext> rodBinding;
    public boolean isKnown = false;
    public boolean isTraining = false;
    public boolean isAntiTraining = false;
    public boolean isTruth = false;
    public boolean isConsensus = false;
    public double prior = 0.0;
    protected final static Logger logger = Logger.getLogger(TrainingSet.class);
    public TrainingSet( final RodBinding<VariantContext> rodBinding) {
        this.rodBinding = rodBinding;
        final Tags tags = rodBinding.getTags();
        final String name = rodBinding.getName();
        // Parse the tags to decide which tracks have which properties
        if( tags != null ) {
            isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
            isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
            isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
            isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
            isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
            prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
        }
        // Report back to the user which tracks were found and the properties that were detected
        if( !isConsensus && !isAntiTraining ) {
            logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", name, isKnown, isTraining, isTruth, prior) );
        } else if( isConsensus ) {
            logger.info( String.format( "Found consensus track: %s", name) );
        } else {
            logger.info( String.format( "Found bad sites training track: %s", name) );
        }
    }
 }
--- a/Show More
+++ b/Show More