Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-09-27 11:07:01 -04:00 · 2011-09-27 11:07:01 -04:00 · e5ce5e265a
parent 7fefc224ba 26e71f6688
commit e5ce5e265a
203 changed files with 5112 additions and 4149 deletions
--- a/build.xml
+++ b/build.xml
@ -163,6 +163,14 @@
      <!-- Remove old versions of ivy jars AFTER the ivy:retrieve has been class loaded. -->
      <delete file="${ivy.jar.dir}/ivy-2.0.0.jar"/>
      <delete file="${ivy.jar.dir}/ivy-2.2.0-rc1.jar"/>
+      <!--
+         An old versions of the ivy-1.4.1.xml does not contain /ivy-module/configuration/conf/@name="compile".
+         Easier to upgrade to 1.4.4 than try to deal with xmlproperty and conditional deletion in ant.
+         Just in case we remove explicit 1.4.4 and go back to 1.4.1, try to clean out the file for now.
+        -->
+      <delete file="${ivy.home}/cache/javax.mail/mail/ivy-1.4.1.xml"/>
+      <delete file="${ivy.home}/cache/javax.mail/mail/ivydata-1.4.1.properties"/>
+      <delete file="${ivy.home}/cache/javax.mail/mail/jars/mail-1.4.1.jar"/>
    </target>

    <target name="init.buildall">
@ -709,53 +717,6 @@
        </antcall>
    </target>

-    <target name="test.init.compile">
-        <mkdir dir="${java.test.classes}"/>
-        <mkdir dir="${scala.test.classes}"/>
-        <antcall target="resolve">
-          <param name="ivy.conf" value="test"/>
-        </antcall>            
-    </target>
-
-    <target name="test.java.compile" depends="init.buildall,dist,test.init.compile">
-        <echo message="Sting: Compiling test cases!"/>
-        <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
-            <src path="${java.public.test.sources}"/>
-            <src path="${java.private.test.sources}"/>
-            <classpath>
-                <path refid="external.dependencies" />
-                <pathelement location="${java.classes}"/>
-                <pathelement location="${java.contracts}"/>
-                <pathelement location="${lib.dir}/testng-5.14.1.jar"/>
-            </classpath>
-            <compilerarg value="-proc:none"/>
-<!-- 
-            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
- -->
-        </javac>
-    </target>
-
-    <target name="test.scala.compile" depends="test.java.compile,scala.compile" if="scala.include">
-        <echo message="Scala: Compiling test cases!"/>
-        <antcall target="resolve">
-          <param name="ivy.conf" value="test"/>
-        </antcall>          
-        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
-	        <src path="${scala.public.test.sources}" />
-	        <src path="${scala.private.test.sources}" />
-            <include name="**/*.scala"/>
-            <classpath>
-                <path refid="scala.dependencies"/>
-                <pathelement location="${scala.test.classes}"/>
-                <pathelement location="${java.test.classes}"/>
-                <pathelement location="${lib.dir}/testng-5.14.1.jar"/>
-            </classpath>
-        </scalac>
-    </target>
-
-    <target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
-
    <!-- new scala target -->
    
    <target name="scala" description="build the scala directory">
@ -769,20 +730,113 @@
    <!-- ***************************************************************************** -->
    <!-- where to put reports and tests-->
    <property name="report" value="${build.dir}/report"/>
-    <property name="java.test.classes" value="${build.dir}/java/testclasses"/>
    <property name="test.output" value="${dist.dir}/test"/>
-    <property name="java.public.test.sources" value="public/java/test"/>
-    <property name="java.private.test.sources" value="private/java/test"/>
+    <property name="java.test.classes" value="${build.dir}/java/testclasses"/>
+    <property name="java.public.test.classes" value="${java.test.classes}/public"/>
+    <property name="java.private.test.classes" value="${java.test.classes}/private"/>
+    <property name="java.public.test.sources" value="${public.dir}/java/test"/>
+    <property name="java.private.test.sources" value="${private.dir}/java/test"/>
    <property name="scala.test.classes" value="${build.dir}/scala/testclasses"/>
-    <property name="scala.public.test.sources" value="public/scala/test"/>
-    <property name="scala.private.test.sources" value="private/scala/test"/>
+    <property name="scala.public.test.classes" value="${scala.test.classes}/public"/>
+    <property name="scala.private.test.classes" value="${scala.test.classes}/private"/>
+    <property name="scala.public.test.sources" value="${public.dir}/scala/test"/>
+    <property name="scala.private.test.sources" value="${private.dir}/scala/test"/>
+    <property name="testng.jar" value="${lib.dir}/testng-5.14.1.jar"/>
    <!-- provide a ceiling on the memory that unit/integration tests can consume. -->
    <property name="test.maxmemory" value="4g"/>

+    <target name="test.init.compile">
+        <mkdir dir="${java.test.classes}"/>
+        <mkdir dir="${scala.test.classes}"/>
+        <antcall target="resolve">
+          <param name="ivy.conf" value="test"/>
+        </antcall>
+    </target>
+
+   <target name="test.java.public.compile" depends="dist,test.init.compile">
+        <mkdir dir="${java.public.test.classes}"/>
+        <echo message="Sting: Compiling public test cases!"/>
+        <javac fork="true" memoryMaximumSize="512m" destdir="${java.public.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
+            <src path="${java.public.test.sources}"/>
+            <classpath>
+                <path refid="external.dependencies" />
+                <pathelement location="${java.classes}"/>
+                <pathelement location="${java.contracts}"/>
+                <pathelement location="${testng.jar}"/>
+            </classpath>
+            <compilerarg value="-proc:none"/>
+<!--
+            <compilerarg value="-Acom.google.java.contract.debug"/>
+            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+ -->
+        </javac>
+    </target>
+
+    <target name="test.java.private.compile" depends="dist,test.init.compile,test.java.public.compile" if="include.private">
+        <mkdir dir="${java.private.test.classes}"/>
+        <echo message="Sting: Compiling private test cases!"/>
+        <javac fork="true" memoryMaximumSize="512m" destdir="${java.private.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
+            <src path="${java.private.test.sources}"/>
+            <classpath>
+                <path refid="external.dependencies" />
+                <pathelement location="${java.public.test.classes}"/>
+                <pathelement location="${java.classes}"/>
+                <pathelement location="${java.contracts}"/>
+                <pathelement location="${testng.jar}"/>
+            </classpath>
+            <compilerarg value="-proc:none"/>
+<!--
+            <compilerarg value="-Acom.google.java.contract.debug"/>
+            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+ -->
+        </javac>
+    </target>
+
+    <target name="test.java.compile" depends="test.java.public.compile, test.java.private.compile"/>
+
+    <target name="test.scala.public.compile" depends="test.java.compile,scala.compile" if="scala.include">
+        <mkdir dir="${scala.public.test.classes}"/>
+        <echo message="Scala: Compiling public test cases!"/>
+        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.public.test.classes}" deprecation="yes" unchecked="yes">
+	        <src path="${scala.public.test.sources}" />
+            <classpath>
+                <path refid="scala.dependencies"/>
+                <pathelement location="${java.public.test.classes}"/>
+                <pathelement location="${testng.jar}"/>
+            </classpath>
+        </scalac>
+    </target>
+
+    <target name="test.scala.private.compile" depends="test.java.compile,scala.compile,test.scala.public.compile" if="include.scala.private">
+        <mkdir dir="${scala.private.test.classes}"/>
+        <echo message="Scala: Compiling private test cases!"/>
+        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.private.test.classes}" deprecation="yes" unchecked="yes">
+	        <src path="${scala.private.test.sources}" />
+            <classpath>
+                <path refid="scala.dependencies"/>
+                <pathelement location="${scala.public.test.classes}"/>
+                <pathelement location="${java.public.test.classes}"/>
+                <pathelement location="${java.private.test.classes}"/>
+                <pathelement location="${testng.jar}"/>
+            </classpath>
+        </scalac>
+    </target>
+
+    <target name="test.scala.compile" depends="test.scala.public.compile,test.scala.private.compile"/>
+
+    <target name="test.compile" depends="init.usecontracts,test.java.compile,test.scala.compile" />
+
    <!-- TEST -->
    <macrodef name="run-test">
        <attribute name="testtype"/>
+        <attribute name="outputdir"/>
+        <attribute name="runfailed"/>
+
        <sequential>
+            <condition property="run.failed.tests">
+                <equals arg1="@{runfailed}" arg2="true"/>
+            </condition>
+
            <!-- Get the pipeline run type.  Default to dry.  -->
            <condition property="pipeline.run" value="dry" else="${pipeline.run}">
                <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
@ -792,10 +846,10 @@
                <isset property="include.contracts" />
            </condition>

-            <mkdir dir="${report}/@{testtype}"/>
+            <mkdir dir="@{outputdir}"/>
            <echo message="Sting: Running @{testtype} test cases!"/>
-            <taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
-            <testng outputDir="${report}/@{testtype}"
+            <taskdef resource="testngtasks" classpath="${testng.jar}"/>
+            <testng outputDir="@{outputdir}"
                    haltOnFailure="false" failureProperty="test.failure"
                    verbose="2"
                    workingDir="${basedir}"
@ -813,117 +867,108 @@
                    <pathelement location="${java.classes}" />
                    <pathelement location="${scala.classes}" />
                    <pathelement location="${java.contracts}" />
-                    <pathelement location="${java.test.classes}" />
-                    <pathelement location="${scala.test.classes}" />
+                    <pathelement location="${java.public.test.classes}" />
+                    <pathelement location="${java.private.test.classes}" />
+                    <pathelement location="${scala.public.test.classes}" />
+                    <pathelement location="${scala.private.test.classes}" />
                </classpath>

-                <classfileset dir="${java.test.classes}" includes="**/@{testtype}.class"/>
-                <classfileset dir="${scala.test.classes}" includes="**/@{testtype}*.class" />
+                <classfileset dir="${java.public.test.classes}" includes="**/@{testtype}.class"/>
+                <classfileset dir="${java.private.test.classes}" erroronmissingdir="false">
+                    <include name="**/@{testtype}.class" if="include.private"/>
+                </classfileset>
+                <classfileset dir="${scala.public.test.classes}" erroronmissingdir="false">
+                    <include name="**/@{testtype}*.class" if="scala.include"/>
+                </classfileset>
+                <classfileset dir="${scala.private.test.classes}" erroronmissingdir="false">
+                    <include name="**/@{testtype}*.class" if="include.scala.private"/>
+                </classfileset>
+
+                <xmlfileset dir="${basedir}">
+                    <include name="@{testtype}" if="run.failed.tests"/>
+                </xmlfileset>
            </testng>

            <!-- generate a report for Bamboo or Hudson to read in -->
-            <junitreport todir="${report}/@{testtype}">
-                <fileset dir="${report}/@{testtype}">
+            <junitreport todir="@{outputdir}">
+                <fileset dir="@{outputdir}">
                    <include name="*/*.xml"/>
                </fileset>
-                <report format="noframes" todir="${report}/@{testtype}"/>
+                <report format="noframes" todir="@{outputdir}"/>
            </junitreport>
-            <fail message="test failed" if="test.failure" />
-        </sequential>
-    </macrodef>
-
-    <!-- FAILED-TEST -->
-    <macrodef name="run-failed-test">
-        <attribute name="xmlfailedtestfile" />
-        <sequential>
-            <!-- Get the pipeline run type.  Default to dry.  -->
-            <condition property="pipeline.run" value="dry" else="${pipeline.run}">
-                <equals arg1="${pipeline.run}" arg2="$${pipeline.run}" />
-            </condition>
-
-            <condition property="cofoja.jvm.args" value="-javaagent:${cofoja.jar} -Dcom.google.java.contract.log.contract=false" else="">
-                <isset property="include.contracts" />
-            </condition>
-
-            <mkdir dir="${report}/failed_rerun" />
-            <echo message="Sting: Running @{xmlfailedtestfile} test cases!"/>
-            <taskdef resource="testngtasks" classpath="${lib.dir}/testng-5.14.1.jar"/>
-            <testng outputDir="${report}/failed_rerun"
-                    haltOnFailure="false" failureProperty="test.failure"
-                    verbose="2"
-                    workingDir="${basedir}"
-                    useDefaultListeners="false"
-                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter">
-                <jvmarg value="-Xmx${test.maxmemory}" />
-                <jvmarg value="-Djava.awt.headless=true" />
-                <jvmarg value="-Dpipeline.run=${pipeline.run}" />
-                <jvmarg value="-Djava.io.tmpdir=${java.io.tmpdir}" />
-                <jvmarg line="${cofoja.jvm.args}"/>
-<!--                 <jvmarg value="-Xdebug"/> -->
-<!--                 <jvmarg value="-Xrunjdwp:transport=dt_socket,server=y,suspend=y,address=5005"/> -->
-                <classpath>
-                    <path refid="external.dependencies" />
-                    <pathelement location="${java.classes}" />
-                    <pathelement location="${scala.classes}" />
-                    <pathelement location="${java.contracts}" />
-                    <pathelement location="${java.test.classes}" />
-                    <pathelement location="${scala.test.classes}" />
-                </classpath>
-
-                <xmlfileset dir="${basedir}" includes="@{xmlfailedtestfile}" />
-            </testng>

            <fail message="test failed" if="test.failure" />
        </sequential>
    </macrodef>

-    <!-- our three different test conditions: Test, IntegrationTest, PerformanceTest -->
-    <target name="test" depends="test.compile" description="Run unit tests">
+    <target name="alltests">
+        <antcall target="test" inheritAll="false"/>
+        <antcall target="integrationtest" inheritAll="false"/>
+        <antcall target="pipelinetest" inheritAll="false"/>
+    </target>
+
+    <target name="alltests.public">
+        <antcall target="test.public" inheritAll="false"/>
+        <antcall target="integrationtest.public" inheritAll="false"/>
+        <antcall target="pipelinetest.public" inheritAll="false"/>
+    </target>
+
+    <!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
+    <target name="test" depends="init.buildall,test.compile" description="Run unit tests">
        <condition property="ttype" value="*UnitTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${ttype}"/>
+        <run-test testtype="${ttype}" outputdir="${report}/${ttype}" runfailed="false"/>
    </target>
-    <target name="integrationtest" depends="test.compile" description="Run integration tests">
+    <target name="test.public" depends="init.buildpublic,test"/>
+
+    <target name="integrationtest" depends="init.buildall,test.compile" description="Run integration tests">
        <condition property="itype" value="*IntegrationTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${itype}"/>
+        <run-test testtype="${itype}" outputdir="${report}/${itype}" runfailed="false"/>
    </target>
-    <target name="performancetest" depends="test.compile" description="Run performance tests">
+    <target name="integrationtest.public" depends="init.buildpublic,integrationtest"/>
+
+    <target name="performancetest" depends="init.buildall,test.compile" description="Run performance tests">
       <condition property="ptype" value="*PerformanceTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${ptype}"/>
+        <run-test testtype="${ptype}" outputdir="${report}/${ptype}" runfailed="false"/>
    </target>
-    <target name="pipelinetest" depends="test.compile" description="Run pipeline tests">
+    <target name="performancetest.public" depends="init.buildpublic,performancetest" />
+
+    <target name="pipelinetest" depends="init.buildall,test.compile" description="Run pipeline tests">
        <condition property="pipetype" value="*PipelineTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${pipetype}"/>
+        <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
    </target>
-    <target name="pipelinetestrun" depends="test.compile" description="Run pipeline tests">
+    <target name="pipelinetest.public" depends="init.buildpublic,pipelinetest" />
+
+    <target name="pipelinetestrun" depends="init.buildall,test.compile" description="Run pipeline tests">
        <property name="pipeline.run" value="run"/>
        <condition property="pipetype" value="*PipelineTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
-        <run-test testtype="${pipetype}"/>
+        <run-test testtype="${pipetype}" outputdir="${report}/${pipetype}" runfailed="false"/>
+    </target>
+    <target name="pipelinetestrun.public" depends="init.buildpublic,pipelinetestrun" />
+
+    <target name="failed-test" depends="init.buildall,test.compile">
+        <run-test testtype="${report}/*UnitTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>

-    <target name="failed-test" depends="test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*UnitTest/testng-failed.xml" />
+    <target name="failed-integration" depends="init.buildall,test.compile">
+        <run-test testtype="${report}/*IntegrationTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>

-    <target name="failed-integration" depends="test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*IntegrationTest/testng-failed.xml" />
+    <target name="failed-performance" depends="init.buildall,test.compile">
+        <run-test testtype="${report}/*PerformanceTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>

-    <target name="failed-performance" depends="test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*PerformanceTest/testng-failed.xml" />
-    </target>
-
-    <target name="failed-pipeline" depends="test.compile">
-        <run-failed-test xmlfailedtestfile="${report}/*PipelineTest/testng-failed.xml" />
+    <target name="failed-pipeline" depends="init.buildall,test.compile">
+        <run-test testtype="${report}/*PipelineTest/testng-failed.xml" outputdir="${report}/failed_rerun" runfailed="true"/>
    </target>

    <!-- ******************************************************************************** -->
--- a/ivy.xml
+++ b/ivy.xml
@ -15,10 +15,8 @@
    <!-- Tribble -->
    <dependency org="org.broad" name="tribble" rev="latest.integration"/>

-    <dependency org="log4j" name="log4j" rev="1.2.15">
-      <!-- Don't include javax.mail here in default, only used in scala->default by commons-email -->
-      <exclude org="javax.mail" />
-    </dependency>
+    <dependency org="log4j" name="log4j" rev="1.2.15"/>
+    <dependency org="javax.mail" name="mail" rev="1.4.4"/>
    <dependency org="colt" name="colt" rev="1.2.0"/>
    <dependency org="jboss" name="javassist" rev="3.7.ga"/>
    <dependency org="org.simpleframework" name="simple-xml" rev="2.0.4"/>
--- a/public/R/queueJobReport.R
+++ b/public/R/queueJobReport.R
@ -12,14 +12,14 @@ if ( onCMDLine ) {
  inputFileName = args[1]
  outputPDF = args[2]
 } else {
-  #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
-  inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
+  inputFileName = "~/Desktop/Q-30033@gsa1.jobreport.txt"
+  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
  outputPDF = NA
 }

-RUNTIME_UNITS = "(sec)"
-ORIGINAL_UNITS_TO_SECONDS = 1/1000
+RUNTIME_UNITS = "(hours)"
+ORIGINAL_UNITS_TO_SECONDS = 1/1000/60/60

 # 
 # Helper function to aggregate all of the jobs in the report across all tables
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
 #
 # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
 #
-plotJobsGantt <- function(gatkReport, sortOverall) {
+plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
  allJobs = allJobsFromReport(gatkReport)
  if ( sortOverall ) {
    title = "All jobs, by analysis, by start time"
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
  }
  allJobs$index = 1:nrow(allJobs)
  minTime = min(allJobs$startTime)
-  allJobs$relStartTime = allJobs$startTime - minTime
-  allJobs$relDoneTime = allJobs$doneTime - minTime
+  allJobs$relStartTime = (allJobs$startTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
+  allJobs$relDoneTime = (allJobs$doneTime - minTime) * ORIGINAL_UNITS_TO_SECONDS
  allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
  maxRelTime = max(allJobs$relDoneTime)
  p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
-  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
-  p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
+  p <- p + theme_bw()
+  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
+  if ( includeText )
+    p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
  p <- p + xlim(0, maxRelTime * 1.1)
  p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
-  p <- p + ylab("Job")
+  p <- p + ylab("Job number")
  p <- p + opts(title=title)
  print(p)
 }
@ -140,6 +142,8 @@ print(paste("Project          :", inputFileName))
 convertUnits <- function(gatkReportData) {
  convertGroup <- function(g) {
    g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
+    g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
+    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
    g
  }
  lapply(gatkReportData, convertGroup)
@ -155,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
  pdf(outputPDF, height=8.5, width=11)
 } 

-plotJobsGantt(gatkReportData, T)
-plotJobsGantt(gatkReportData, F)
+plotJobsGantt(gatkReportData, T, F)
+plotJobsGantt(gatkReportData, F, F)
 plotProgressByTime(gatkReportData)
 for ( group in gatkReportData ) {
 plotGroup(group)
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -114,7 +114,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
    private String RECAL_FILE = "output.recal_data.csv";
    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
    private String OUTPUT_DIR = "analyzeCovariates/";
-    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/tools/apps/R-2.6.0/bin/Rscript", required = false)
+    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
    private String PATH_TO_RSCRIPT = "Rscript";
    @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
    private String PATH_TO_RESOURCES = "public/R/";
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@ -379,7 +379,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                    }

                    if ( tribbleType == null )
-                        if ( ! file.canRead() | !! file.isFile() ) {
+                        if ( ! file.canRead() | ! file.isFile() ) {
                            throw new UserException.BadArgumentValue(name, "Couldn't read file to determine type: " + file);
                        } else {
                            throw new UserException.CommandLineException(
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -929,6 +929,14 @@ public class GenomeAnalysisEngine {
        return readsDataSource.getHeader(reader);
    }

+    /**
+     * Gets the master sequence dictionary for this GATK engine instance
+     * @return a never-null dictionary listing all of the contigs known to this engine instance
+     */
+    public SAMSequenceDictionary getMasterSequenceDictionary() {
+        return getReferenceDataSource().getReference().getSequenceDictionary();
+    }
+
    /**
     * Returns data source object encapsulating all essential info and handlers used to traverse
     * reads; header merger, individual file readers etc can be accessed through the returned data source object.
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;

 import net.sf.picard.util.PeekableIterator;
 import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.GATKChunk;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;

@ -84,7 +85,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
            if(currentLocus == GenomeLoc.UNMAPPED) {
                nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
                for(SAMReaderID id: dataSource.getReaderIDs())
-                    nextFilePointer.addFileSpans(id,new GATKBAMFileSpan());
+                    nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
                currentLocus = null;
                continue;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@ -215,6 +215,45 @@ public class GATKBAMIndex {
        return (new GATKBin(bin).getBinNumber()-levelStart+1)*(BIN_GENOMIC_SPAN /levelSize);
    }

+    /**
+     * Use to get close to the unmapped reads at the end of a BAM file.
+     * @return The file offset of the first record in the last linear bin, or -1
+     * if there are no elements in linear bins (i.e. no mapped reads).
+     */
+    public long getStartOfLastLinearBin() {
+        openIndexFile();
+
+        seek(4);
+
+        final int sequenceCount = readInteger();
+        // Because no reads may align to the last sequence in the sequence dictionary,
+        // grab the last element of the linear index for each sequence, and return
+        // the last one from the last sequence that has one.
+        long lastLinearIndexPointer = -1;
+        for (int i = 0; i < sequenceCount; i++) {
+            // System.out.println("# Sequence TID: " + i);
+            final int nBins = readInteger();
+            // System.out.println("# nBins: " + nBins);
+            for (int j1 = 0; j1 < nBins; j1++) {
+                // Skip bin #
+                skipBytes(4);
+                final int nChunks = readInteger();
+                // Skip chunks
+                skipBytes(16 * nChunks);
+            }
+            final int nLinearBins = readInteger();
+            if (nLinearBins > 0) {
+                // Skip to last element of list of linear bins
+                skipBytes(8 * (nLinearBins - 1));
+                lastLinearIndexPointer = readLongs(1)[0];
+            }
+        }
+
+        closeIndexFile();
+
+        return lastLinearIndexPointer;
+    }
+
    /**
     * Gets the possible number of bins for a given reference sequence.
     * @return How many bins could possibly be used according to this indexing scheme to index a single contig.
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
     */
    public FilePointer next() {
        FilePointer current = wrappedIterator.next();
-        while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
            current = current.combine(parser,wrappedIterator.next());
        return current;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
@ -134,24 +134,11 @@ public class ReadShardStrategy implements ShardStrategy {
            Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
            while(selectedReaders.size() == 0 && currentFilePointer != null) {
                shardPosition = currentFilePointer.fileSpans;
+
                for(SAMReaderID id: shardPosition.keySet()) {
-                    // If the region contains location information (in other words, it is not at
-                    // the start of the unmapped region), add the region.
-                    if(currentFilePointer.isRegionUnmapped) {
-                        // If the region is unmapped and no location data exists, add a null as an indicator to
-                        // start at the next unmapped region.
-                        if(!isIntoUnmappedRegion) {
-                            selectedReaders.put(id,null);
-                            isIntoUnmappedRegion = true;
-                        }
-                        else
-                            selectedReaders.put(id,position.get(id));
-                    }
-                    else {
-                        SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
-                        if(!fileSpan.isEmpty())
-                            selectedReaders.put(id,fileSpan);
-                    }
+                    SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
+                    if(!fileSpan.isEmpty())
+                        selectedReaders.put(id,fileSpan);
                }

                if(selectedReaders.size() > 0) {
--- a/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/examples/GATKDocsExample.java
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.examples;

 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.ArgumentCollection;
+import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -59,6 +60,7 @@ import org.broadinstitute.sting.gatk.walkers.RodWalker;
 * @author Your Name
 * @since Date created
 */
+@Hidden
 public class GATKDocsExample extends RodWalker<Integer, Integer> {
    /**
     * Put detailed documentation about the argument here.  No need to duplicate the summary information
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
 * @version 0.1
 */
 public class PlatformFilter extends ReadFilter {
-    @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
+    @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
    protected String[] PLFilterNames;

    public boolean filterOut(SAMRecord rec) {
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        else if ( stub.getOutputStream() != null ) {
            this.file = null;
            this.stream = stub.getOutputStream();
-            writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
+            writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
        }
        else
            throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        }

        // The GATK/Tribble can't currently index block-compressed files on the fly.  Disable OTF indexing even if the user explicitly asked for it.
-        return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
+        return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
    }


--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
@ -25,6 +25,7 @@

 package org.broadinstitute.sting.gatk.io.stubs;

+import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.gatk.CommandLineExecutable;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
        return isCompressed;
    }

+    /**
+     * Gets the master sequence dictionary from the engine associated with this stub
+     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
+     * @return
+     */
+    public SAMSequenceDictionary getMasterSequenceDictionary() {
+        return engine.getMasterSequenceDictionary();
+    }
+
    /**
     * Should we tell the VCF writer not to write genotypes?
     * @return true if the writer should not write genotypes.
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -293,15 +293,16 @@ public class GATKRunReport {
     * That is, postReport() is guarenteed not to fail for any reason.
     */
    private File postReportToLocalDisk(File rootDir) {
+        String filename = getID() + ".report.xml.gz";
+        File file = new File(rootDir, filename);
        try {
-            String filename = getID() + ".report.xml.gz";
-            File file = new File(rootDir, filename);
            postReportToFile(file);
            logger.debug("Wrote report to " + file);
            return file;
        } catch ( Exception e ) {
            // we catch everything, and no matter what eat the error
            exceptDuringRunReport("Couldn't read report file", e);
+            file.delete();
            return null;
        }
    }
@ -312,6 +313,7 @@ public class GATKRunReport {
        File localFile = postReportToLocalDisk(new File("./"));
        logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
        if ( localFile != null ) { // we succeeded in creating the local file
+            localFile.deleteOnExit();
            try {
                // stop us from printing the annoying, and meaningless, mime types warning
                Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
@ -336,14 +338,13 @@ public class GATKRunReport {
                //logger.info("Uploading " + localFile + " to AWS bucket");
                S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
                logger.debug("Uploaded to AWS: " + s3Object);
+                logger.info("Uploaded run statistics report to AWS S3");
            } catch ( S3ServiceException e ) {
                exceptDuringRunReport("S3 exception occurred", e);
            } catch ( NoSuchAlgorithmException e ) {
                exceptDuringRunReport("Couldn't calculate MD5", e);
            } catch ( IOException e ) {
                exceptDuringRunReport("Couldn't read report file", e);
-            } finally {
-                localFile.delete();
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
@ -101,7 +101,7 @@ public class RMDIndexer extends CommandLineProgram {
        Index index = IndexFactory.createIndex(inputFileSource, codec, approach);

        // add writing of the sequence dictionary, if supplied
-        builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
+        builder.validateAndUpdateIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary());

        // create the output stream, and write the index
        LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.refdata.tracks;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import org.apache.log4j.Logger;
+import org.broad.tribble.index.Index;
+import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
+
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Utilities for working with Sequence Dictionaries embedded in tribble indices
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public class IndexDictionaryUtils {
+    private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
+
+    // a constant we use for marking sequence dictionary entries in the Tribble index property list
+    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
+
+    /**
+     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
+     * @param index the index file to use
+     * @return a SAMSequenceDictionary if available, null if unavailable
+     */
+    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
+        SAMSequenceDictionary dict = new SAMSequenceDictionary();
+        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
+            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
+                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
+                        Integer.valueOf(entry.getValue())));
+        }
+        return dict;
+    }
+
+    /**
+     * create the sequence dictionary with the contig list; a backup approach
+     * @param index the index file to use
+     * @param dict the sequence dictionary to add contigs to
+     * @return the filled-in sequence dictionary
+     */
+    static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
+        LinkedHashSet<String> seqNames = index.getSequenceNames();
+        if (seqNames == null) {
+            return dict;
+        }
+        for (String name : seqNames) {
+            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
+            dict.addSequence(seq);
+        }
+        return dict;
+    }
+
+    public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
+        for ( SAMSequenceRecord seq : dict.getSequences() ) {
+            final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
+            final String length = String.valueOf(seq.getSequenceLength());
+            index.addProperty(contig,length);
+        }
+    }
+
+    public static void validateTrackSequenceDictionary(final String trackName,
+                                                       final SAMSequenceDictionary trackDict,
+                                                       final SAMSequenceDictionary referenceDict,
+                                                       final ValidationExclusion.TYPE validationExclusionType ) {
+        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
+        if (trackDict == null || trackDict.size() == 0)
+            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
+        else {
+            Set<String> trackSequences = new TreeSet<String>();
+            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
+                trackSequences.add(dictionaryEntry.getSequenceName());
+            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.gatk.refdata.tracks;

 import net.sf.samtools.SAMSequenceDictionary;
-import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.LinkedHashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-


 /**
- * 
- * @author aaron 
+ *
+ * @author aaron
 *                                           `
 * Class RMDTrackBuilder
 *
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
    private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
    public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;

-    // a constant we use for marking sequence dictionary entries in the Tribble index property list
-    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
-
    // private sequence dictionary we use to set our tracks with
    private SAMSequenceDictionary dict = null;

@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
                try { logger.info(String.format("  Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
                catch (ReviewedStingException e) { }

-                sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);

                // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
                if (sequenceDictionary.size() == 0 && dict != null) {
                    File indexFile = Tribble.indexFile(inputFile);
-                    setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
-                    sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                    validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
+                    try { // re-write the index
+                        writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
+                    } catch (IOException e) {
+                        logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
+                    }
+
+                    sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                }

                if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        // this can take a while, let them know what we're doing
        logger.info("Creating Tribble index in memory for file " + inputFile);
        Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
-        setIndexSequenceDictionary(inputFile, idx, dict, null, false);
+        validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
        return idx;
    }

-
-    // ---------------------------------------------------------------------------------------------------------
-    // static functions to work with the sequence dictionaries of indexes
-    // ---------------------------------------------------------------------------------------------------------
-    
-    /**
-     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
-     * @param index the index file to use
-     * @return a SAMSequenceDictionary if available, null if unavailable
-     */
-    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
-        SAMSequenceDictionary dict = new SAMSequenceDictionary();
-        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
-            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
-                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
-                                 Integer.valueOf(entry.getValue())));
-        }
-        return dict;
-    }
-
-    /**
-     * create the sequence dictionary with the contig list; a backup approach
-     * @param index the index file to use
-     * @param dict the sequence dictionary to add contigs to
-     * @return the filled-in sequence dictionary
-     */
-    private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
-        LinkedHashSet<String> seqNames = index.getSequenceNames();
-        if (seqNames == null) {
-            return dict;
-        }
-        for (String name : seqNames) {
-            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
-            dict.addSequence(seq);
-        }
-        return dict;
-    }
-
    /**
     * set the sequence dictionary of the track.  This function checks that the contig listing of the underlying file is compatible.
     * (that each contig in the index is in the sequence dictionary).
     * @param inputFile for proper error message formatting.
     * @param dict the sequence dictionary
     * @param index the index file
-     * @param indexFile the index file
-     * @param rewriteIndex should we rewrite the index when we're done?
-     *
     */
-    public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
-        if (dict == null) return;
-
-        SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
-        validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
+    public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
+        if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");

        // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
-        for (SAMSequenceRecord seq : currentDict.getSequences()) {
-            if (dict.getSequence(seq.getSequenceName()) == null)
-                continue;
-            index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
-        }
-        // re-write the index
-        if (rewriteIndex) try {
-            writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
-        } catch (IOException e) {
-            logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
-        }
+        final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
+        validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
+
+        // actually update the dictionary in the index
+        IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
    }

-
-    public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
-        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
-        if (trackDict == null || trackDict.size() == 0)
-            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
-        else {
-            Set<String> trackSequences = new TreeSet<String>();
-            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
-                trackSequences.add(dictionaryEntry.getSequenceName());
-            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
-        }
+    public void validateTrackSequenceDictionary(final String trackName,
+                                                final SAMSequenceDictionary trackDict,
+                                                final SAMSequenceDictionary referenceDict ) {
+        IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
@ -24,12 +24,14 @@

 package org.broadinstitute.sting.gatk.report;

+import org.broadinstitute.sting.utils.collections.Pair;
+
 import java.util.*;

 /**
 * Tracks a linked list of GATKReportColumn in order by name.
 */
-public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
+public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
    private List<String> columnNames = new ArrayList<String>();

    /**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
        columnNames.add(key);
        return super.put(key, value);
    }
+
+    @Override
+    public Iterator<GATKReportColumn> iterator() {
+        return new Iterator<GATKReportColumn>() {
+            int offset = 0;
+            public boolean hasNext() { return offset < columnNames.size() ; }
+            public GATKReportColumn next() { return getByIndex(offset++); }
+            public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
+        };
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -286,6 +286,10 @@ public class GATKReportTable {
        }
    }

+    public boolean containsKey(Object primaryKey) {
+        return primaryKeyColumn.contains(primaryKey);
+    }
+
    /**
     * Set the value for a given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -358,7 +358,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
    public void printOnTraversalDone() {
        printProgress(null, null, true);

-        final double elapsed = timer.getElapsedTime();
+        final double elapsed = timer == null ? 0 : timer.getElapsedTime();

        ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics();        

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintRODsWalker.java
@ -26,21 +26,23 @@
 package org.broadinstitute.sting.gatk.walkers;

 import org.broad.tribble.Feature;
+import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.refdata.VariantContextAdaptors;
-import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;

 import java.io.PrintStream;
-import java.util.Iterator;

 /**
 * Prints out all of the RODs in the input data set. Data is rendered using the toString() method
 * of the given ROD.
 */
 public class PrintRODsWalker extends RodWalker<Integer, Integer> {
+    @Input(fullName="input", shortName = "input", doc="The input ROD which should be printed out.", required=true)
+    public RodBinding<Feature> input;
+
    @Output
    PrintStream out;

@ -62,7 +64,7 @@ public class PrintRODsWalker extends RodWalker<Integer, Integer> {
        if ( tracker == null )
            return 0;

-        for ( Feature feature : tracker.getValues(Feature.class) ) {
+        for ( Feature feature : tracker.getValues(Feature.class, context.getLocation()) ) {
            out.println(feature.toString());
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 *   -I input1.bam \
 *   -I input2.bam \
 *   --read_filter MappingQualityZero
+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T PrintReads \
+ *   -o output.bam \
+ *   -I input.bam \
+ *   -n 2000
 * </pre>
 *
 */
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -25,6 +25,7 @@

 package org.broadinstitute.sting.gatk.walkers;

+import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -77,6 +78,15 @@ public abstract class Walker<MapType, ReduceType> {
        return toolkit;
    }

+    /**
+     * Gets the master sequence dictionary for this walker
+     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
+     * @return
+     */
+    protected SAMSequenceDictionary getMasterSequenceDictionary() {
+        return getToolkit().getMasterSequenceDictionary();
+    }
+
    /**
     * (conceptual static) method that states whether you want to see reads piling up at a locus
     * that contain a deletion at the locus.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -43,6 +43,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * The allele balance (fraction of ref bases over ref + alt bases) across all bialleleic het-called samples
+ */
 public class AlleleBalance extends InfoFieldAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -16,6 +16,9 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;


+/**
+ * The allele balance (fraction of ref bases over ref + alt bases) separately for each bialleleic het-called sample
+ */
 public class AlleleBalanceBySample extends GenotypeAnnotation implements ExperimentalAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AnnotationByDepth.java
@ -6,8 +6,9 @@ import org.broadinstitute.sting.utils.variantcontext.Genotype;

 import java.util.Map;

-
-
+/**
+ * Abstract base class for all annotations that are normalized by depth
+ */
 public abstract class AnnotationByDepth extends InfoFieldAnnotation {


--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseCounts.java
@ -47,6 +47,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Count of A, C, G, T bases across all samples
+ */
 public class BaseCounts extends InfoFieldAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/BaseQualityRankSumTest.java
@ -13,6 +13,9 @@ import java.util.LinkedHashMap;
 import java.util.List;


+/**
+ * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for base qualities (ref bases vs. bases of the alternate allele)
+ */
 public class BaseQualityRankSumTest extends RankSumTest {
    public List<String> getKeyNames() { return Arrays.asList("BaseQRankSum"); }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ChromosomeCounts.java
@ -44,6 +44,11 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Allele count in genotypes, for each ALT allele, in the same order as listed;
+ * allele Frequency, for each ALT allele, in the same order as listed; total number
+ * of alleles in called genotypes.
+ */
 public class ChromosomeCounts extends InfoFieldAnnotation implements StandardAnnotation {

    private String[] keyNames = { VCFConstants.ALLELE_NUMBER_KEY, VCFConstants.ALLELE_COUNT_KEY, VCFConstants.ALLELE_FREQUENCY_KEY };
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -16,7 +16,23 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
+/**
+ * Total (unfiltered) depth over all samples.
+ *
+ * This and AD are complementary fields that are two important ways of thinking about the depth of the data for this sample
+ * at this site.  The DP field describe the total depth of reads that passed the Unified Genotypers internal
+ * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
+ * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
+ * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
+ * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
+ * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
+ * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
+ * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
+ * normally be excluded from the statistical calculations going into GQ and QUAL.
+ *
+ * Note that the DP is affected by downsampling (-dcov) though, so the max value one can obtain for N samples with
+ * -dcov D is N * D
+ */
 public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@ -23,6 +23,25 @@ import java.util.List;
 import java.util.Map;


+/**
+ * The depth of coverage of each VCF allele in this sample.
+ *
+ * This and DP are complementary fields that are two important ways of thinking about the depth of the data for this sample
+ * at this site. The DP field describe the total depth of reads that passed the Unified Genotypers internal
+ * quality control metrics (like MAPQ > 17, for example), whatever base was present in the read at this site.
+ * The AD values (one for each of REF and ALT fields) is the count of all reads that carried with them the
+ * REF and ALT alleles. The reason for this distinction is that the DP is in some sense reflective of the
+ * power I have to determine the genotype of the sample at this site, while the AD tells me how many times
+ * I saw each of the REF and ALT alleles in the reads, free of any bias potentially introduced by filtering
+ * the reads. If, for example, I believe there really is a an A/T polymorphism at a site, then I would like
+ * to know the counts of A and T bases in this sample, even for reads with poor mapping quality that would
+ * normally be excluded from the statistical calculations going into GQ and QUAL. Please note, however, that
+ * the AD isn't necessarily calculated exactly for indels (it counts as non-reference only those indels that
+ * are actually present and correctly left-aligned in the alignments themselves). Because of this fact and
+ * because the AD includes reads and bases that were filtered by the Unified Genotyper, <b>one should not base
+ * assumptions about the underlying genotype based on it</b>; instead, the genotype likelihoods (PLs) are what
+ * determine the genotype calls (see below).
+ */
 public class DepthPerAlleleBySample extends GenotypeAnnotation implements StandardAnnotation {

    private static String REF_ALLELE = "REF";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@ -43,6 +43,11 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;


+/**
+ * Phred-scaled p-value using Fisher's Exact Test to detect strand bias (the variation
+ * being seen on only the forward or only the reverse strand) in the reads? More bias is
+ * indicative of false positive calls.
+ */
 public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotation {
    private static final String FS = "FS";
    private static final double MIN_PVALUE = 1E-320;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/GCContent.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * The GC content (# GC bases / # all bases) of the reference within 50 bp +/- this site
+ */
 public class GCContent extends InfoFieldAnnotation implements ExperimentalAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HaplotypeScore.java
@ -34,12 +34,12 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
 import org.broadinstitute.sting.gatk.walkers.genotyper.IndelGenotypeLikelihoodsCalculationModel;
 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.AlignmentUtils;
@ -49,6 +49,10 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.*;

+/**
+ * Consistency of the site with two (and only two) segregating haplotypes. Higher scores
+ * are indicative of regions with bad alignments, often leading to artifactual SNP and indel calls.
+ */
 public class HaplotypeScore extends InfoFieldAnnotation implements StandardAnnotation {
    private final static boolean DEBUG = false;
    private final static int MIN_CONTEXT_WING_SIZE = 10;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HardyWeinberg.java
@ -19,6 +19,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Phred-scaled P value of genotype-based (using GT field) test for Hardy-Weinberg test for disequilibrium
+ */
 public class HardyWeinberg extends InfoFieldAnnotation implements WorkInProgressAnnotation {

    private static final int MIN_SAMPLES = 10;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/HomopolymerRun.java
@ -16,7 +16,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
+/**
+ * Largest contiguous homopolymer run of the variant allele in either direction on the reference.
+ */
 public class HomopolymerRun extends InfoFieldAnnotation implements StandardAnnotation {

    private boolean ANNOTATE_INDELS = true;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java
@ -17,14 +17,15 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-/**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: 5/16/11
- */

-// A set of annotations calculated directly from the GLs
-public class GLstats extends InfoFieldAnnotation implements StandardAnnotation {
+/**
+ * Likelihood-based (using PL field) test for the inbreeding among samples.
+ *
+ * A continuous generalization of the Hardy-Weinberg test for disequilibrium that works
+ * well with limited coverage per sample.  See the 1000 Genomes Phase I release for
+ * more information.
+ */
+public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnnotation {

    private static final int MIN_SAMPLES = 10;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/IndelType.java
@ -14,11 +14,7 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.util.*;

 /**
- * Created by IntelliJ IDEA.
- * User: delangel
- * Date: Mar 11, 2011
- * Time: 11:47:33 AM
- * To change this template use File | Settings | File Templates.
+ * Rough category of indel type (insertion, deletion, multi-allelic, other)
 */
 public class IndelType extends InfoFieldAnnotation implements ExperimentalAnnotation {

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/LowMQ.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Triplet annotation: fraction of MAQP == 0, MAPQ < 10, and count of all mapped reads
+ */
 public class LowMQ extends InfoFieldAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@ -14,6 +14,9 @@ import java.util.LinkedHashMap;
 import java.util.List;


+/**
+ * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for mapping qualities (reads with ref bases vs. those with the alternate allele)
+ */
 public class MappingQualityRankSumTest extends RankSumTest {

    public List<String> getKeyNames() { return Arrays.asList("MQRankSum"); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZero.java
@ -19,6 +19,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Total count across all samples of mapping quality zero reads
+ */
 public class MappingQualityZero extends InfoFieldAnnotation implements StandardAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
@ -1,85 +1,81 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.annotator;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: Feb 4, 2011
- * Time: 6:46:25 PM
- * To change this template use File | Settings | File Templates.
- */
-public class MappingQualityZeroBySample extends GenotypeAnnotation {
-    public Map<String, Object> annotate(RefMetaDataTracker tracker,
-                                        AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
-        if ( g == null || !g.isCalled() )
-            return null;
-
-        int mq0 = 0;
-        ReadBackedPileup pileup = null;
-        if (vc.isIndel() && context.hasExtendedEventPileup())
-            pileup = context.getExtendedEventPileup();
-        else if (context.hasBasePileup())
-            pileup = context.getBasePileup();
-        else return null;
-
-        if (pileup != null) {
-            for (PileupElement p : pileup ) {
-                if ( p.getMappingQual() == 0 )
-                    mq0++;
-            }
-        }
-        Map<String, Object> map = new HashMap<String, Object>();
-        map.put(getKeyNames().get(0), String.format("%d", mq0));
-        return map;
-    }
-
-    public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
-
-    public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
-            new VCFFormatHeaderLine(getKeyNames().get(0), 1,
-                    VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
-
-
-}
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Count for each sample of mapping quality zero reads
+ */
+public class MappingQualityZeroBySample extends GenotypeAnnotation {
+    public Map<String, Object> annotate(RefMetaDataTracker tracker,
+                                        AnnotatorCompatibleWalker walker, ReferenceContext ref, AlignmentContext context, VariantContext vc, Genotype g) {
+        if ( g == null || !g.isCalled() )
+            return null;
+
+        int mq0 = 0;
+        ReadBackedPileup pileup = null;
+        if (vc.isIndel() && context.hasExtendedEventPileup())
+            pileup = context.getExtendedEventPileup();
+        else if (context.hasBasePileup())
+            pileup = context.getBasePileup();
+        else return null;
+
+        if (pileup != null) {
+            for (PileupElement p : pileup ) {
+                if ( p.getMappingQual() == 0 )
+                    mq0++;
+            }
+        }
+        Map<String, Object> map = new HashMap<String, Object>();
+        map.put(getKeyNames().get(0), String.format("%d", mq0));
+        return map;
+    }
+
+    public List<String> getKeyNames() { return Arrays.asList(VCFConstants.MAPPING_QUALITY_ZERO_KEY); }
+
+    public List<VCFFormatHeaderLine> getDescriptions() { return Arrays.asList(
+            new VCFFormatHeaderLine(getKeyNames().get(0), 1,
+                    VCFHeaderLineType.Integer, "Number of Mapping Quality Zero Reads per sample")); }
+
+
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroFraction.java
@ -17,8 +17,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
-
+/**
+ * Fraction of all reads across samples that have mapping quality zero
+ */
 public class MappingQualityZeroFraction extends InfoFieldAnnotation implements ExperimentalAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/NBaseCount.java
@ -17,11 +17,8 @@ import java.util.List;
 import java.util.Map;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: 5/16/11
+ * The number of N bases, counting only SOLiD data
 */
-
 public class NBaseCount extends InfoFieldAnnotation {
    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
        if( stratifiedContexts.size() == 0 )
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/QualByDepth.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -15,7 +16,11 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
+/**
+ * Variant confidence (given as (AB+BB)/AA from the PLs) / unfiltered depth.
+ *
+ * Low scores are indicative of false positive calls and artifacts.
+ */
 public class QualByDepth extends AnnotationByDepth implements StandardAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RMSMappingQuality.java
@ -21,6 +21,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Root Mean Square of the mapping quality of the reads across all samples.
+ */
 public class RMSMappingQuality extends InfoFieldAnnotation implements StandardAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@ -21,7 +21,9 @@ import java.util.List;
 import java.util.Map;


-
+/**
+ * Abstract root for all RankSum based annotations
+ */
 public abstract class RankSumTest extends InfoFieldAnnotation implements StandardAnnotation {
    static final double INDEL_LIKELIHOOD_THRESH = 0.1;
    static final boolean DEBUG = false;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadDepthAndAllelicFractionBySample.java
@ -1,209 +1,207 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.annotator;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
-import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: Feb 4, 2011
- * Time: 3:59:27 PM
- * To change this template use File | Settings | File Templates.
- */
-public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
-
-        private static String REF_ALLELE = "REF";
-
-        private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
-
-        public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
-                                            AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
-            if ( g == null || !g.isCalled() )
-                return null;
-
-            if ( vc.isSNP() )
-                return annotateSNP(stratifiedContext, vc);
-            if ( vc.isIndel() )
-                return annotateIndel(stratifiedContext, vc);
-
-            return null;
-        }
-
-        private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
-
-            if ( ! stratifiedContext.hasBasePileup() ) return null;
-
-            HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
-            for ( Allele allele : vc.getAlternateAlleles() )
-                alleleCounts.put(allele.getBases()[0], 0);
-
-            ReadBackedPileup pileup = stratifiedContext.getBasePileup();
-            int totalDepth = pileup.size();
-
-            Map<String, Object> map = new HashMap<String, Object>();
-            map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
-
-            if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
-
-            int mq0 = 0; // number of "ref" reads that are acually mq0
-            for ( PileupElement p : pileup ) {
-                if ( p.getMappingQual() == 0 ) {
-                    mq0++;
-                    continue;
-                }
-                if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
-                    alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
-            }
-
-            if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
-
-            // we need to add counts in the correct order
-            String[] fracs = new String[alleleCounts.size()];
-            for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
-                fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
-            }
-
-            map.put(getKeyNames().get(1), fracs);
-            return map;
-        }
-
-        private Map<String,Object> annotateIndel(AlignmentContext
-            stratifiedContext, VariantContext
-            vc) {
-
-            if ( ! stratifiedContext.hasExtendedEventPileup() ) {
-                return null;
-            }
-
-            ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
-            if ( pileup == null )
-                return null;
-            int totalDepth = pileup.size();
-
-            Map<String, Object> map = new HashMap<String, Object>();
-            map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
-
-            if ( totalDepth == 0 ) return map;
-            int mq0 = 0; // number of "ref" reads that are acually mq0
-
-            HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
-            Allele refAllele = vc.getReference();
-
-            for ( Allele allele : vc.getAlternateAlleles() ) {
-
-                if ( allele.isNoCall() ) {
-                    continue; // this does not look so good, should we die???
-                }
-
-                alleleCounts.put(getAlleleRepresentation(allele), 0);
-            }
-
-            for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
-
-                if ( e.getMappingQual() == 0 ) {
-                    mq0++;
-                    continue;
-                }
-
-                if ( e.isInsertion() ) {
-
-                    final String b =  e.getEventBases();
-                    if ( alleleCounts.containsKey(b) ) {
-                        alleleCounts.put(b, alleleCounts.get(b)+1);
-                    }
-
-                } else {
-                    if ( e.isDeletion() ) {
-                        if ( e.getEventLength() == refAllele.length() ) {
-                            // this is indeed the deletion allele recorded in VC
-                            final String b = DEL;
-                            if ( alleleCounts.containsKey(b) ) {
-                                alleleCounts.put(b, alleleCounts.get(b)+1);
-                            }
-                        }
-//                    else {
-//                        System.out.print("   deletion of WRONG length found");
-//                    }
-                    }
-                }
-            }
-
-            if ( mq0 == totalDepth ) return map;
-
-            String[] fracs = new String[alleleCounts.size()];
-            for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
-                fracs[i] = String.format("%.3f",
-                        ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
-
-            map.put(getKeyNames().get(1), fracs);
-
-            //map.put(getKeyNames().get(0), counts);
-            return map;
-        }
-
-        private String getAlleleRepresentation(Allele allele) {
-            if ( allele.isNull() ) { // deletion wrt the ref
-                 return DEL;
-            } else { // insertion, pass actual bases
-                return allele.getBaseString();
-            }
-
-        }
-
-     //   public String getIndelBases()
-        public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
-
-        public List<VCFFormatHeaderLine> getDescriptions() {
-            return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
-                            1,
-                            VCFHeaderLineType.Integer,
-                            "Total read depth per sample, including MQ0"),
-                            new VCFFormatHeaderLine(getKeyNames().get(1),
-                            VCFHeaderLineCount.UNBOUNDED,
-                            VCFHeaderLineType.Float,
-                            "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
-        }
-}
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFFormatHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
+import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.variantcontext.Allele;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Unsupported
+ */
+@Hidden
+public class ReadDepthAndAllelicFractionBySample extends GenotypeAnnotation {
+
+        private static String REF_ALLELE = "REF";
+
+        private static String DEL = "DEL"; // constant, for speed: no need to create a key string for deletion allele every time
+
+        public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref,
+                                            AlignmentContext stratifiedContext, VariantContext vc, Genotype g) {
+            if ( g == null || !g.isCalled() )
+                return null;
+
+            if ( vc.isSNP() )
+                return annotateSNP(stratifiedContext, vc);
+            if ( vc.isIndel() )
+                return annotateIndel(stratifiedContext, vc);
+
+            return null;
+        }
+
+        private Map<String,Object> annotateSNP(AlignmentContext stratifiedContext, VariantContext vc) {
+
+            if ( ! stratifiedContext.hasBasePileup() ) return null;
+
+            HashMap<Byte, Integer> alleleCounts = new HashMap<Byte, Integer>();
+            for ( Allele allele : vc.getAlternateAlleles() )
+                alleleCounts.put(allele.getBases()[0], 0);
+
+            ReadBackedPileup pileup = stratifiedContext.getBasePileup();
+            int totalDepth = pileup.size();
+
+            Map<String, Object> map = new HashMap<String, Object>();
+            map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
+
+            if ( totalDepth == 0 ) return map; // done, can not compute FA at 0 coverage!!
+
+            int mq0 = 0; // number of "ref" reads that are acually mq0
+            for ( PileupElement p : pileup ) {
+                if ( p.getMappingQual() == 0 ) {
+                    mq0++;
+                    continue;
+                }
+                if ( alleleCounts.containsKey(p.getBase()) ) // non-mq0 read and it's an alt
+                    alleleCounts.put(p.getBase(), alleleCounts.get(p.getBase())+1);
+            }
+
+            if ( mq0 == totalDepth ) return map; // if all reads are mq0, there is nothing left to do
+
+            // we need to add counts in the correct order
+            String[] fracs = new String[alleleCounts.size()];
+            for (int i = 0; i < vc.getAlternateAlleles().size(); i++) {
+                fracs[i] = String.format("%.3f", ((float)alleleCounts.get(vc.getAlternateAllele(i).getBases()[0]))/(totalDepth-mq0));
+            }
+
+            map.put(getKeyNames().get(1), fracs);
+            return map;
+        }
+
+        private Map<String,Object> annotateIndel(AlignmentContext
+            stratifiedContext, VariantContext
+            vc) {
+
+            if ( ! stratifiedContext.hasExtendedEventPileup() ) {
+                return null;
+            }
+
+            ReadBackedExtendedEventPileup pileup = stratifiedContext.getExtendedEventPileup();
+            if ( pileup == null )
+                return null;
+            int totalDepth = pileup.size();
+
+            Map<String, Object> map = new HashMap<String, Object>();
+            map.put(getKeyNames().get(0), totalDepth); // put total depth in right away
+
+            if ( totalDepth == 0 ) return map;
+            int mq0 = 0; // number of "ref" reads that are acually mq0
+
+            HashMap<String, Integer> alleleCounts = new HashMap<String, Integer>();
+            Allele refAllele = vc.getReference();
+
+            for ( Allele allele : vc.getAlternateAlleles() ) {
+
+                if ( allele.isNoCall() ) {
+                    continue; // this does not look so good, should we die???
+                }
+
+                alleleCounts.put(getAlleleRepresentation(allele), 0);
+            }
+
+            for ( ExtendedEventPileupElement e : pileup.toExtendedIterable() ) {
+
+                if ( e.getMappingQual() == 0 ) {
+                    mq0++;
+                    continue;
+                }
+
+                if ( e.isInsertion() ) {
+
+                    final String b =  e.getEventBases();
+                    if ( alleleCounts.containsKey(b) ) {
+                        alleleCounts.put(b, alleleCounts.get(b)+1);
+                    }
+
+                } else {
+                    if ( e.isDeletion() ) {
+                        if ( e.getEventLength() == refAllele.length() ) {
+                            // this is indeed the deletion allele recorded in VC
+                            final String b = DEL;
+                            if ( alleleCounts.containsKey(b) ) {
+                                alleleCounts.put(b, alleleCounts.get(b)+1);
+                            }
+                        }
+//                    else {
+//                        System.out.print("   deletion of WRONG length found");
+//                    }
+                    }
+                }
+            }
+
+            if ( mq0 == totalDepth ) return map;
+
+            String[] fracs = new String[alleleCounts.size()];
+            for (int i = 0; i < vc.getAlternateAlleles().size(); i++)
+                fracs[i] = String.format("%.3f",
+                        ((float)alleleCounts.get(getAlleleRepresentation(vc.getAlternateAllele(i))))/(totalDepth-mq0));
+
+            map.put(getKeyNames().get(1), fracs);
+
+            //map.put(getKeyNames().get(0), counts);
+            return map;
+        }
+
+        private String getAlleleRepresentation(Allele allele) {
+            if ( allele.isNull() ) { // deletion wrt the ref
+                 return DEL;
+            } else { // insertion, pass actual bases
+                return allele.getBaseString();
+            }
+
+        }
+
+     //   public String getIndelBases()
+        public List<String> getKeyNames() { return Arrays.asList("DP","FA"); }
+
+        public List<VCFFormatHeaderLine> getDescriptions() {
+            return Arrays.asList(new VCFFormatHeaderLine(getKeyNames().get(0),
+                            1,
+                            VCFHeaderLineType.Integer,
+                            "Total read depth per sample, including MQ0"),
+                            new VCFFormatHeaderLine(getKeyNames().get(1),
+                            VCFHeaderLineCount.UNBOUNDED,
+                            VCFHeaderLineType.Float,
+                            "Fractions of reads (excluding MQ0 from both ref and alt) supporting each reported alternative allele, per sample"));
+        }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@ -19,11 +19,8 @@ import java.util.LinkedHashMap;
 import java.util.List;

 /**
- * Created by IntelliJ IDEA.
- * User: rpoplin
- * Date: 3/30/11
+ * The phred-scaled p-value (u-based z-approximation) from the Mann-Whitney Rank Sum Test for the distance from the end of the read for reads with the alternate allele; if the alternate allele is only seen near the ends of reads this is indicative of error).
 */
-
 public class ReadPosRankSumTest extends RankSumTest {

    public List<String> getKeyNames() { return Arrays.asList("ReadPosRankSum"); }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SBByDepth.java
@ -15,8 +15,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
-
+/**
+ * SB annotation value by depth of alt containing samples
+ */
 public class SBByDepth extends AnnotationByDepth {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
@ -26,7 +27,7 @@ public class SBByDepth extends AnnotationByDepth {
        if (!vc.hasAttribute(VCFConstants.STRAND_BIAS_KEY))
            return null;

-        double sBias = Double.valueOf(vc.getAttributeAsString(VCFConstants.STRAND_BIAS_KEY));
+        double sBias = vc.getAttributeAsDouble(VCFConstants.STRAND_BIAS_KEY, -1);

        final Map<String, Genotype> genotypes = vc.getGenotypes();
        if ( genotypes == null || genotypes.size() == 0 )
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SampleList.java
@ -41,7 +41,9 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;

-
+/**
+ * List all of the samples in the info field
+ */
 public class SampleList extends InfoFieldAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@ -24,7 +24,9 @@

 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
 import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

@ -46,134 +45,522 @@ import java.util.*;
 * (http://snpeff.sourceforge.net/).
 *
 * For each variant, chooses one of the effects of highest biological impact from the SnpEff
- * output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
+ * output file (which must be provided on the command line via --snpEffFile filename.vcf),
 * and adds annotations on that effect.
 *
- * The possible biological effects and their associated impacts are defined in the class:
- * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
- *
 * @author David Roazen
 */
 public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {

-    // SnpEff annotation key names:
-    public static final String GENE_ID_KEY = "GENE_ID";
-    public static final String GENE_NAME_KEY = "GENE_NAME";
-    public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
-    public static final String EXON_ID_KEY = "EXON_ID";
-    public static final String EXON_RANK_KEY = "EXON_RANK";
-    public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
-    public static final String EFFECT_KEY = "EFFECT";
-    public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
-    public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
-    public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
-    public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
-    public static final String CODON_NUM_KEY = "CODON_NUM";
-    public static final String CDS_SIZE_KEY = "CDS_SIZE";
+    private static Logger logger = Logger.getLogger(SnpEff.class);
+
+    // We refuse to parse SnpEff output files generated by unsupported versions, or
+    // lacking a SnpEff version number in the VCF header:
+    public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
+    public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
+    public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
+
+    // When we write the SnpEff version number and command line to the output VCF, we change
+    // the key name slightly so that the output VCF won't be confused in the future for an
+    // output file produced by SnpEff directly:
+    public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
+    public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
+
+    // SnpEff aggregates all effects (and effect metadata) together into a single INFO
+    // field annotation with the key EFF:
+    public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
+    public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
+    public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
+
+    // Key names for the INFO field annotations we will add to each record, along
+    // with parsing-related information:
+    public enum InfoFieldKey {
+        EFFECT_KEY            ("SNPEFF_EFFECT",           -1),
+        IMPACT_KEY            ("SNPEFF_IMPACT",            0),
+        CODON_CHANGE_KEY      ("SNPEFF_CODON_CHANGE",      1),
+        AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
+        GENE_NAME_KEY         ("SNPEFF_GENE_NAME",         3),
+        GENE_BIOTYPE_KEY      ("SNPEFF_GENE_BIOTYPE",      4),
+        TRANSCRIPT_ID_KEY     ("SNPEFF_TRANSCRIPT_ID",     6),
+        EXON_ID_KEY           ("SNPEFF_EXON_ID",           7),
+        FUNCTIONAL_CLASS_KEY  ("SNPEFF_FUNCTIONAL_CLASS", -1);
+
+        // Actual text of the key
+        private final String keyName;
+
+        // Index within the effect metadata subfields from the SnpEff EFF annotation
+        // where each key's associated value can be found during parsing.
+        private final int fieldIndex;
+
+        InfoFieldKey ( String keyName, int fieldIndex ) {
+            this.keyName = keyName;
+            this.fieldIndex = fieldIndex;
+        }
+
+        public String getKeyName() {
+            return keyName;
+        }
+
+        public int getFieldIndex() {
+            return fieldIndex;
+        }
+    }
+
+    // Possible SnpEff biological effects. All effect names found in the SnpEff input file
+    // are validated against this list.
+    public enum EffectType {
+        // High-impact effects:
+        FRAME_SHIFT                           (EffectFunctionalClass.NONE,     false),
+        STOP_GAINED                           (EffectFunctionalClass.NONSENSE, false),
+        START_LOST                            (EffectFunctionalClass.NONE,     false),
+        SPLICE_SITE_ACCEPTOR                  (EffectFunctionalClass.NONE,     false),
+        SPLICE_SITE_DONOR                     (EffectFunctionalClass.NONE,     false),
+        EXON_DELETED                          (EffectFunctionalClass.NONE,     false),
+        STOP_LOST                             (EffectFunctionalClass.NONE,     false),
+
+        // Moderate-impact effects:
+        NON_SYNONYMOUS_CODING                 (EffectFunctionalClass.MISSENSE, false),
+        CODON_CHANGE                          (EffectFunctionalClass.NONE,     false),
+        CODON_INSERTION                       (EffectFunctionalClass.NONE,     false),
+        CODON_CHANGE_PLUS_CODON_INSERTION     (EffectFunctionalClass.NONE,     false),
+        CODON_DELETION                        (EffectFunctionalClass.NONE,     false),
+        CODON_CHANGE_PLUS_CODON_DELETION      (EffectFunctionalClass.NONE,     false),
+        UTR_5_DELETED                         (EffectFunctionalClass.NONE,     false),
+        UTR_3_DELETED                         (EffectFunctionalClass.NONE,     false),
+
+        // Low-impact effects:
+        SYNONYMOUS_CODING                     (EffectFunctionalClass.SILENT,   false),
+        SYNONYMOUS_START                      (EffectFunctionalClass.SILENT,   false),
+        NON_SYNONYMOUS_START                  (EffectFunctionalClass.SILENT,   false),
+        SYNONYMOUS_STOP                       (EffectFunctionalClass.SILENT,   false),
+        NON_SYNONYMOUS_STOP                   (EffectFunctionalClass.SILENT,   false),
+        START_GAINED                          (EffectFunctionalClass.NONE,     false),
+
+        // Modifiers:
+        NONE                                  (EffectFunctionalClass.NONE,     true),
+        CHROMOSOME                            (EffectFunctionalClass.NONE,     true),
+        INTERGENIC                            (EffectFunctionalClass.NONE,     true),
+        UPSTREAM                              (EffectFunctionalClass.NONE,     true),
+        UTR_5_PRIME                           (EffectFunctionalClass.NONE,     true),
+        CDS                                   (EffectFunctionalClass.NONE,     true),
+        GENE                                  (EffectFunctionalClass.NONE,     true),
+        TRANSCRIPT                            (EffectFunctionalClass.NONE,     true),
+        EXON                                  (EffectFunctionalClass.NONE,     true),
+        INTRON                                (EffectFunctionalClass.NONE,     true),
+        UTR_3_PRIME                           (EffectFunctionalClass.NONE,     true),
+        DOWNSTREAM                            (EffectFunctionalClass.NONE,     true),
+        INTRON_CONSERVED                      (EffectFunctionalClass.NONE,     true),
+        INTERGENIC_CONSERVED                  (EffectFunctionalClass.NONE,     true),
+        REGULATION                            (EffectFunctionalClass.NONE,     true),
+        CUSTOM                                (EffectFunctionalClass.NONE,     true),
+        WITHIN_NON_CODING_GENE                (EffectFunctionalClass.NONE,     true);
+
+        private final EffectFunctionalClass functionalClass;
+        private final boolean isModifier;
+
+        EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
+            this.functionalClass = functionalClass;
+            this.isModifier = isModifier;
+        }
+
+        public EffectFunctionalClass getFunctionalClass() {
+            return functionalClass;
+        }
+
+        public boolean isModifier() {
+            return isModifier;
+        }
+    }
+
+    // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
+    // classifying some of the LOW impact effects as MODIFIERs.
+    public enum EffectImpact {
+        MODIFIER  (0),
+        LOW       (1),
+        MODERATE  (2),
+        HIGH      (3);
+
+        private final int severityRating;
+
+        EffectImpact ( int severityRating ) {
+            this.severityRating = severityRating;
+        }
+
+        public boolean isHigherImpactThan ( EffectImpact other ) {
+            return this.severityRating > other.severityRating;
+        }
+
+        public boolean isSameImpactAs ( EffectImpact other ) {
+            return this.severityRating == other.severityRating;
+        }
+    }
+
+    // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
+    public enum EffectCoding {
+        CODING,
+        NON_CODING,
+        UNKNOWN
+    }
+
+    // We assign a functional class to each SnpEff effect.
+    public enum EffectFunctionalClass {
+        NONE     (0),
+        SILENT   (1),
+        MISSENSE (2),
+        NONSENSE (3);
+
+        private final int priority;
+
+        EffectFunctionalClass ( int priority ) {
+            this.priority = priority;
+        }
+
+        public boolean isHigherPriorityThan ( EffectFunctionalClass other ) {
+            return this.priority > other.priority;
+        }
+    }
+
+    public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
+        // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
+        // without providing a SnpEff rod via --snpEffFile):
+        validateRodBinding(walker.getSnpEffRodBinding());
+        RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
+
+        // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
+        // the SnpEff rod, and that the file was generated by a supported version of SnpEff:
+        VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
+        VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
+        VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
+
+        checkSnpEffVersion(snpEffVersionLine);
+        checkSnpEffCommandLine(snpEffCommandLine);
+
+        // If everything looks ok, add the SnpEff version number and command-line header lines to the
+        // header of the VCF output file, changing the key names so that our output file won't be
+        // mistaken in the future for a SnpEff output file:
+        headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
+        headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
+    }

    public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
-        RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
-        validateRodBinding(snpEffRodBinding);
+        RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();

-        List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
+        // Get only SnpEff records that start at this locus, not merely span it:
+        List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());

-        // Add only annotations for one of the most biologically-significant effects as defined in
-        // the SnpEffConstants class:
-        SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
-
-        if ( mostSignificantEffect == null ) {
+        // Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
+        // If there is more than one such record, we only need to pick the first one, since the biological
+        // effects will be the same across all such records:
+        VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
+        if ( matchingRecord == null ) {
            return null;
        }

-        return generateAnnotations(mostSignificantEffect);
+        // Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
+        List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
+        if ( effects.size() == 0 ) {
+            return null;
+        }
+
+        // Add only annotations for one of the most biologically-significant effects from this set:
+        SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
+        return mostSignificantEffect.getAnnotations();
    }

-    private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
+    private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
        if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
-            throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
-                                    "as a rodbinding on the command line, but no SnpEff rodbinding was found.");
+            throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
+                                    "as a rodbinding on the command line via the --snpEffFile option, but " +
+                                    "no SnpEff rodbinding was found.");
        }
    }

-    private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
-        SnpEffFeature mostSignificantEffect = null;
+    private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
+        if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
+            throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
+                                    "input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
+                                    Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
+        }

-        for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
+        String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
+
+        if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
+            throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
+                                    "is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
+        }
+    }
+
+    private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
+        if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
+            throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
+                                    "input file, which should be added by all supported versions of SnpEff (" +
+                                    Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
+        }
+    }
+
+    private boolean isSupportedSnpEffVersion ( String versionString ) {
+        for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
+            if ( supportedVersion.equals(versionString) ) {
+                return true;
+            }
+        }
+
+        return false;
+    }
+
+    private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
+        for ( VariantContext snpEffRecord : snpEffRecords ) {
+            if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
+                return snpEffRecord;
+            }
+        }
+
+        return null;
+    }
+
+    private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
+        List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
+
+        Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
+        if ( effectFieldValue == null ) {
+            return parsedEffects;
+        }
+
+        // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
+        // We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
+        List<String> individualEffects;
+        if ( effectFieldValue instanceof List ) {
+            individualEffects = (List<String>)effectFieldValue;
+        }
+        else {
+            individualEffects = Arrays.asList((String)effectFieldValue);
+        }
+
+        for ( String effectString : individualEffects ) {
+            String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
+
+            if ( effectNameAndMetadata.length != 2 ) {
+                logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
+                                          snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
+                continue;
+            }
+
+            String effectName = effectNameAndMetadata[0];
+            String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
+
+            SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
+
+            if ( parsedEffect.isWellFormed() ) {
+                parsedEffects.add(parsedEffect);
+            }
+            else {
+                logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
+                                          snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
+            }
+        }
+
+        return parsedEffects;
+    }
+
+    private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
+        SnpEffEffect mostSignificantEffect = null;
+
+        for ( SnpEffEffect effect : effects ) {
            if ( mostSignificantEffect == null ||
-                 snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
+                 effect.isHigherImpactThan(mostSignificantEffect) ) {

-                mostSignificantEffect = snpEffFeature;
+                mostSignificantEffect = effect;
            }
        }

        return mostSignificantEffect;
    }

-    private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
-        Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
-
-        if ( mostSignificantEffect.hasGeneID() )
-            annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
-        if ( mostSignificantEffect.hasGeneName() )
-            annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
-        if ( mostSignificantEffect.hasTranscriptID() )
-            annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
-        if ( mostSignificantEffect.hasExonID() )
-            annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
-        if ( mostSignificantEffect.hasExonRank() )
-            annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
-        if ( mostSignificantEffect.isNonCodingGene() )
-            annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
-
-        annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
-        annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
-        if ( mostSignificantEffect.hasEffectExtraInformation() )
-            annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
-
-        if ( mostSignificantEffect.hasOldAndNewAA() )
-            annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
-        if ( mostSignificantEffect.hasOldAndNewCodon() )
-            annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
-        if ( mostSignificantEffect.hasCodonNum() )
-            annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
-        if ( mostSignificantEffect.hasCdsSize() )
-            annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
-
-        return annotations;
-    }
-
    public List<String> getKeyNames() {
-        return Arrays.asList( GENE_ID_KEY,
-                              GENE_NAME_KEY,
-                              TRANSCRIPT_ID_KEY,
-                              EXON_ID_KEY,
-                              EXON_RANK_KEY,
-                              WITHIN_NON_CODING_GENE_KEY,
-                              EFFECT_KEY,
-                              EFFECT_IMPACT_KEY,
-                              EFFECT_EXTRA_INFORMATION_KEY,
-                              OLD_NEW_AA_KEY,
-                              OLD_NEW_CODON_KEY,
-                              CODON_NUM_KEY,
-                              CDS_SIZE_KEY
+        return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
+                              InfoFieldKey.IMPACT_KEY.getKeyName(),
+                              InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
+                              InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
+                              InfoFieldKey.GENE_NAME_KEY.getKeyName(),
+                              InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
+                              InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
+                              InfoFieldKey.EXON_ID_KEY.getKeyName(),
+                              InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
                            );
    }

    public List<VCFInfoHeaderLine> getDescriptions() {
        return Arrays.asList(
-            new VCFInfoHeaderLine(GENE_ID_KEY,                  1, VCFHeaderLineType.String,  "Gene ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(GENE_NAME_KEY,                1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY,            1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EXON_ID_KEY,                  1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(EXON_RANK_KEY,                1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY,   0, VCFHeaderLineType.Flag,    "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
-            new VCFInfoHeaderLine(EFFECT_KEY,                   1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
-            new VCFInfoHeaderLine(EFFECT_IMPACT_KEY,            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
-            new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String,  "Additional information about the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(OLD_NEW_AA_KEY,               1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(OLD_NEW_CODON_KEY,            1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(CODON_NUM_KEY,                1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(CDS_SIZE_KEY,                 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
+            new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
+            new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
+            new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(),         1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Gene biotype for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),     1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),  1, VCFHeaderLineType.String,  "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
        );
    }
+
+    /**
+     * Helper class to parse, validate, and store a single SnpEff effect and its metadata.
+     */
+    protected static class SnpEffEffect {
+        private EffectType effect;
+        private EffectImpact impact;
+        private String codonChange;
+        private String aminoAcidChange;
+        private String geneName;
+        private String geneBiotype;
+        private EffectCoding coding;
+        private String transcriptID;
+        private String exonID;
+
+        private String parseError = null;
+        private boolean isWellFormed = true;
+
+        private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
+        private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
+        private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
+
+        // Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
+        // errors come after warnings, not vice versa:
+        private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
+        private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
+
+        private static final int SNPEFF_CODING_FIELD_INDEX = 5;
+
+        public SnpEffEffect ( String effectName, String[] effectMetadata ) {
+            parseEffectName(effectName);
+            parseEffectMetadata(effectMetadata);
+        }
+
+        private void parseEffectName ( String effectName ) {
+            try {
+                effect = EffectType.valueOf(effectName);
+            }
+            catch ( IllegalArgumentException e ) {
+                parseError(String.format("%s is not a recognized effect type", effectName));
+            }
+        }
+
+        private void parseEffectMetadata ( String[] effectMetadata ) {
+            if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
+                if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
+                    parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
+                }
+                else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
+                    parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
+                }
+                else {
+                    parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
+                                             EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
+                }
+
+                return;
+            }
+
+            if ( effect != null && effect.isModifier() ) {
+                impact = EffectImpact.MODIFIER;
+            }
+            else {
+                try {
+                    impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
+                }
+                catch ( IllegalArgumentException e ) {
+                    parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
+                }
+            }
+
+            codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
+            aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
+            geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
+            geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];
+
+            if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
+                try {
+                    coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
+                }
+                catch ( IllegalArgumentException e ) {
+                    parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
+                }
+            }
+            else {
+                coding = EffectCoding.UNKNOWN;
+            }
+
+            transcriptID = effectMetadata[InfoFieldKey.TRANSCRIPT_ID_KEY.getFieldIndex()];
+            exonID = effectMetadata[InfoFieldKey.EXON_ID_KEY.getFieldIndex()];
+        }
+
+        private void parseError ( String message ) {
+            isWellFormed = false;
+
+            // Cache only the first error encountered:
+            if ( parseError == null ) {
+                parseError = message;
+            }
+        }
+
+        public boolean isWellFormed() {
+            return isWellFormed;
+        }
+
+        public String getParseError() {
+            return parseError == null ? "" : parseError;
+        }
+
+        public boolean isCoding() {
+            return coding == EffectCoding.CODING;
+        }
+
+        public boolean isHigherImpactThan ( SnpEffEffect other ) {
+            // If one effect is within a coding gene and the other is not, the effect that is
+            // within the coding gene has higher impact:
+
+            if ( isCoding() && ! other.isCoding() ) {
+                return true;
+            }
+            else if ( ! isCoding() && other.isCoding() ) {
+                return false;
+            }
+
+            // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
+            // of the effects themselves. Effects with the same impact are tie-broken using the
+            // functional class of the effect:
+
+            if ( impact.isHigherImpactThan(other.impact) ) {
+                return true;
+            }
+            else if ( impact.isSameImpactAs(other.impact) ) {
+                return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
+            }
+
+            return false;
+        }
+
+        public Map<String, Object> getAnnotations() {
+            Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
+
+            addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
+            addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
+            addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
+            addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
+            addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
+            addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
+            addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
+            addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
+            addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());
+
+            return annotations;
+        }
+
+        private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
+            // Only add annotations for keys associated with non-empty values:
+            if ( keyValue != null && keyValue.trim().length() > 0 ) {
+                annotations.put(keyName, keyValue);
+            }
+        }
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SpanningDeletions.java
@ -17,6 +17,9 @@ import java.util.List;
 import java.util.Map;


+/**
+ * Fraction of reads containing spanning deletions at this site.
+ */
 public class SpanningDeletions extends InfoFieldAnnotation implements StandardAnnotation {

    public Map<String, Object> annotate(RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/TechnologyComposition.java
@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

+import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -19,12 +20,9 @@ import java.util.List;
 import java.util.Map;

 /**
- * Created by IntelliJ IDEA.
- * User: delangel
- * Date: 6/29/11
- * Time: 3:14 PM
- * To change this template use File | Settings | File Templates.
+ * Counts of bases from SLX, 454, and SOLiD at this site
 */
+@Hidden
 public class TechnologyComposition extends InfoFieldAnnotation implements ExperimentalAnnotation {
    private String nSLX = "NumSLX";
    private String n454 ="Num454";
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -86,14 +85,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann

    @ArgumentCollection
    protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
+    public RodBinding<VariantContext> getVariantRodBinding() { return variantCollection.variants; }

    /**
     * The INFO field will be annotated with information on the most biologically-significant effect
     * listed in the SnpEff output file for each variant.
     */
    @Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
-    public RodBinding<SnpEffFeature> snpEffFile;
-    public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
+    public RodBinding<VariantContext> snpEffFile;
+    public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }

    /**
      * rsIDs from this file are used to populate the ID column of the output.  Also, the DB INFO flag will be set when appropriate.
@ -162,6 +162,12 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
    @Argument(fullName="vcfContainsOnlyIndels", shortName="dels",doc="Use if you are annotating an indel vcf, currently VERY experimental", required = false)
    protected boolean indelsOnly = false;

+    @Argument(fullName="family_string",shortName="family",required=false,doc="A family string of the form mom+dad=child for use with the mendelian violation ratio annotation")
+    public String familyStr = null;
+
+    @Argument(fullName="MendelViolationGenotypeQualityThreshold",shortName="mvq",required=false,doc="The genotype quality treshold in order to annotate mendelian violation ratio")
+    public double minGenotypeQualityP = 0.0;
+
    private VariantAnnotatorEngine engine;

    private Collection<VariantContext> indelBufferContext;
@ -203,9 +209,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
        }

        if ( USE_ALL_ANNOTATIONS )
-            engine = new VariantAnnotatorEngine(this);
+            engine = new VariantAnnotatorEngine(this, getToolkit());
        else
-            engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
+            engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
        engine.initializeExpressions(expressionsToUse);

        // setup the header fields
@ -217,6 +223,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
                hInfo.add(line);
        }

+        engine.invokeAnnotationInitializationMethods(hInfo);
+
        VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
        vcfWriter.writeHeader(vcfHeader);

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@ -26,13 +26,11 @@
 package org.broadinstitute.sting.gatk.walkers.annotator;

 import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotationInterfaceManager;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.GenotypeAnnotation;
-import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
+import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.*;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
@ -49,6 +47,7 @@ public class VariantAnnotatorEngine {

    private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
    private AnnotatorCompatibleWalker walker;
+    private GenomeAnalysisEngine toolkit;

    private static class VAExpression {

@ -74,16 +73,18 @@ public class VariantAnnotatorEngine {
    }

    // use this constructor if you want all possible annotations
-    public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
+    public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
        this.walker = walker;
+        this.toolkit = toolkit;
        requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
        requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
        initializeDBs();
    }

    // use this constructor if you want to select specific annotations (and/or interfaces)
-    public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
+    public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
        this.walker = walker;
+        this.toolkit = toolkit;
        initializeAnnotations(annotationGroupsToUse, annotationsToUse);
        initializeDBs();
    }
@ -113,6 +114,16 @@ public class VariantAnnotatorEngine {
            dbAnnotations.put(rod, rod.getName());
    }

+    public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
+        for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
+            annotation.initialize(walker, toolkit, headerLines);
+        }
+
+        for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
+            annotation.initialize(walker, toolkit, headerLines);
+        }
+    }
+
    public Set<VCFHeaderLine> getVCFAnnotationDescriptions() {

        Set<VCFHeaderLine> descriptions = new HashSet<VCFHeaderLine>();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
@ -1,7 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;

 import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.List;
@ -9,8 +8,9 @@ import java.util.List;
 public interface AnnotatorCompatibleWalker {

    // getter methods for various used bindings
-    public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
+    public abstract RodBinding<VariantContext> getVariantRodBinding();
+    public abstract RodBinding<VariantContext> getSnpEffRodBinding();
    public abstract RodBinding<VariantContext> getDbsnpRodBinding();
    public abstract List<RodBinding<VariantContext>> getCompRodBindings();
    public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
-}
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
@ -24,18 +24,18 @@

 package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;

-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.List;
-import java.util.Map;
+import java.util.Set;

@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
 public abstract class VariantAnnotatorAnnotation {
    // return the INFO keys
    public abstract List<String> getKeyNames();
+
+    // initialization method (optional for subclasses, and therefore non-abstract)
+    public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/BeagleOutputToVCFWalker.java
@ -175,21 +175,16 @@ public class BeagleOutputToVCFWalker  extends RodWalker<Integer, Integer> {
        }

        BeagleFeature beagleR2Feature = tracker.getFirstValue(beagleR2);
-        // ignore places where we don't have a variant
-        if ( beagleR2Feature == null )
-            return 0;
-
-
        BeagleFeature beagleProbsFeature = tracker.getFirstValue(beagleProbs);
-
-        // ignore places where we don't have a variant
-        if ( beagleProbsFeature == null )
-            return 0;
-
        BeagleFeature beaglePhasedFeature = tracker.getFirstValue(beaglePhased);
+
        // ignore places where we don't have a variant
-        if ( beaglePhasedFeature == null )
-            return 0;
+        if ( beagleR2Feature == null || beagleProbsFeature == null ||  beaglePhasedFeature == null)
+        {
+           vcfWriter.add(vc_input);
+           return 1;
+        }
+

        // get reference base for current position
        byte refByte = ref.getBase();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
@ -63,20 +63,32 @@ import java.util.*;
 * <h2>Input</h2>
 * <p>
 * One or more bam files (with proper headers) to be analyzed for coverage statistics
- * (Optional) A REFSEQ Rod to aggregate coverage to the gene level
 * </p>
- *
+ * <p>
+ *(Optional) A REFSEQ Rod to aggregate coverage to the gene level
+ * <p>
+ * (for information about creating the REFSEQ Rod, please consult the RefSeqCodec documentation)
+ *</p></p>
 * <h2>Output</h2>
 * <p>
 * Tables pertaining to different coverage summaries. Suffix on the table files declares the contents:
+ * </p><p>
 *  - no suffix: per locus coverage
+ * </p><p>
 *  - _summary: total, mean, median, quartiles, and threshold proportions, aggregated over all bases
+ * </p><p>
 *  - _statistics: coverage histograms (# locus with X coverage), aggregated over all bases
+ * </p><p>
 *  - _interval_summary: total, mean, median, quartiles, and threshold proportions, aggregated per interval
+ * </p><p>
 *  - _interval_statistics: 2x2 table of # of intervals covered to >= X depth in >=Y samples
+ * </p><p>
 *  - _gene_summary: total, mean, median, quartiles, and threshold proportions, aggregated per gene
+ * </p><p>
 *  - _gene_statistics: 2x2 table of # of genes covered to >= X depth in >= Y samples
+ * </p><p>
 *  - _cumulative_coverage_counts: coverage histograms (# locus with >= X coverage), aggregated over all bases
+ * </p><p>
 *  - _cumulative_coverage_proportions: proprotions of loci with >= X coverage, aggregated over all bases
 * </p>
 *
@ -84,7 +96,7 @@ import java.util.*;
 * <pre>
 * java -Xmx2g -jar GenomeAnalysisTK.jar \
 *   -R ref.fasta \
- *   -T VariantEval \
+ *   -T DepthOfCoverage \
 *   -o file_name_base \
 *   -I input_bams.list
 *   [-geneList refSeq.sorted.txt] \
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceWalker.java
@ -43,8 +43,10 @@ import java.util.List;
 * Generates an alternative reference sequence over the specified interval.
 *
 * <p>
- * Given variant ROD tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
- * Additionally, allows for a "snpmask" ROD to set overlapping bases to 'N'.
+ * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
+ * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
+ * Note that if there are multiple variants at a site, it takes the first one seen.
+ * Reference bases for each interval will be output as a separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
 * <p>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaReferenceWalker.java
@ -42,6 +42,9 @@ import java.io.PrintStream;
 *
 * <p>
 * The output format can be partially controlled using the provided command-line arguments.
+ * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
+ * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
+ * separate fasta sequence (named numerically in order).
 *
 * <h2>Input</h2>
 * <p>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
@ -23,7 +23,7 @@
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */

-package org.broadinstitute.sting.utils.genotype;
+package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.utils.BaseUtils;

@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
 * Time: 6:46:09 PM
 * To change this template use File | Settings | File Templates.
 */
-public enum DiploidGenotype {
+enum DiploidGenotype {
    AA ('A', 'A'),
    AC ('A', 'C'),
    AG ('A', 'G'),
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidIndelGenotypePriors.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.genotype.DiploidGenotype;

 /**
 * Created by IntelliJ IDEA.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 import org.broadinstitute.sting.utils.pileup.FragmentPileup;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@ -276,8 +275,11 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
        if ( elt.isReducedRead() ) {
            // reduced read representation
            byte qual = elt.getReducedQual();
-            add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
-            return elt.getReducedCount(); // we added nObs bases here
+            if ( BaseUtils.isRegularBase( elt.getBase() )) {
+                add(obsBase, qual, (byte)0, (byte)0, elt.getReducedCount()); // fast calculation of n identical likelihoods
+                return elt.getReducedCount(); // we added nObs bases here
+            } else // odd bases or deletions => don't use them
+                return 0;
        } else {
            byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
            return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypePriors.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.genotype.DiploidGenotype;

 import java.util.Arrays;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java
@ -48,27 +48,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
    // code for testing purposes
    //
    private final static boolean DEBUG = false;
-    private final static boolean PRINT_LIKELIHOODS = false;
-    private final static int N_CYCLES = 1;
-    private SimpleTimer timerExpt = new SimpleTimer("linearExactBanded");
-    private SimpleTimer timerGS = new SimpleTimer("linearExactGS");
-    private final static boolean COMPARE_TO_GS = false;
-
-    public enum ExactCalculation {
-        N2_GOLD_STANDARD,
-        LINEAR_EXPERIMENTAL
-    }
-
    private final static double MAX_LOG10_ERROR_TO_STOP_EARLY = 6; // we want the calculation to be accurate to 1 / 10^6
+    private final boolean SIMPLE_GREEDY_GENOTYPER = false;
+    private final static double SUM_GL_THRESH_NOCALL = -0.001; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.

-    private boolean SIMPLE_GREEDY_GENOTYPER = false;
-
-
-
-    final private ExactCalculation calcToUse;
    protected ExactAFCalculationModel(UnifiedArgumentCollection UAC, int N, Logger logger, PrintStream verboseWriter) {
        super(UAC, N, logger, verboseWriter);
-        calcToUse = UAC.EXACT_CALCULATION_TYPE;
    }

    public void getLog10PNonRef(RefMetaDataTracker tracker,
@ -76,43 +61,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                                Map<String, Genotype> GLs, Set<Allele>alleles,
                                double[] log10AlleleFrequencyPriors,
                                double[] log10AlleleFrequencyPosteriors) {
-        // todo -- REMOVE ME AFTER TESTING
-        // todo -- REMOVE ME AFTER TESTING
-        // todo -- REMOVE ME AFTER TESTING
-        double[] gsPosteriors;
-        if ( COMPARE_TO_GS ) // due to annoying special values in incoming array, we have to clone up here
-            gsPosteriors = log10AlleleFrequencyPosteriors.clone();
-
-        int idxAA = GenotypeType.AA.ordinal();
-        int idxAB = GenotypeType.AB.ordinal();
-        int idxBB = GenotypeType.BB.ordinal();
-
-        // todo -- remove me after testing
-        if ( N_CYCLES > 1 ) {
-            for ( int i = 0; i < N_CYCLES; i++) {
-                timerGS.restart();
-                linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone(), idxAA, idxAB, idxBB);
-                timerGS.stop();
-
-                timerExpt.restart();
-                linearExactBanded(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.clone());
-                timerExpt.stop();
-            }
-
-            System.out.printf("good = %.2f, expt = %.2f, delta = %.2f%n",
-                    timerGS.getElapsedTime(), timerExpt.getElapsedTime(), timerExpt.getElapsedTime()-timerGS.getElapsedTime());
-        }
-
-        int lastK = -1;
-
-        int numAlleles = alleles.size();
+        final int numAlleles = alleles.size();
+        final double[][] posteriorCache = numAlleles > 2 ? new double[numAlleles-1][] : null;
+        final double[] bestAFguess = numAlleles > 2 ? new double[numAlleles-1] : null;

        int idxDiag = numAlleles;
        int incr = numAlleles - 1;
-
-        double[][] posteriorCache = new double[numAlleles-1][];
-        double[] bestAFguess = new double[numAlleles-1];
-
        for (int k=1; k < numAlleles; k++) {
            // multi-allelic approximation, part 1: Ideally
            // for each alt allele compute marginal (suboptimal) posteriors -
@ -121,24 +75,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC.
            // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD

-            idxAA = 0;
-            idxAB = k;
+            final int idxAA = 0;
+            final int idxAB = k;
            // yy is always element on the diagonal.
            // 2 alleles: BBelement 2
            // 3 alleles: BB element  3. CC element 5
            // 4 alleles:
-            idxBB = idxDiag;
+            final int idxBB = idxDiag;
            idxDiag += incr--;

-            // todo - possible cleanup
-            switch ( calcToUse ) {
-                case N2_GOLD_STANDARD:
-                    lastK = gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
-                    break;
-                case LINEAR_EXPERIMENTAL:
-                    lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
-                    break;
-            }
+            final int lastK = linearExact(GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, idxAA, idxAB, idxBB);
+
            if (numAlleles > 2) {
                posteriorCache[k-1] = log10AlleleFrequencyPosteriors.clone();
                bestAFguess[k-1] = (double)MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors);
@ -153,47 +100,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                log10AlleleFrequencyPosteriors[k] = (posteriorCache[mostLikelyAlleleIdx][k]);

        }
-        // todo -- REMOVE ME AFTER TESTING
-        // todo -- REMOVE ME AFTER TESTING
-        // todo -- REMOVE ME AFTER TESTING
-        if ( COMPARE_TO_GS ) {
-            gdaN2GoldStandard(GLs, log10AlleleFrequencyPriors, gsPosteriors, idxAA, idxAB, idxBB);
-
-            double log10thisPVar = Math.log10(MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors)[0]);
-            double log10gsPVar = Math.log10(MathUtils.normalizeFromLog10(gsPosteriors)[0]);
-            boolean eq = (log10thisPVar == Double.NEGATIVE_INFINITY && log10gsPVar == Double.NEGATIVE_INFINITY) || MathUtils.compareDoubles(log10thisPVar, log10gsPVar, 1e-4) == 0;
-
-            if ( ! eq || PRINT_LIKELIHOODS ) {
-                System.out.printf("----------------------------------------%n");
-                for (int k=0; k < log10AlleleFrequencyPosteriors.length; k++) {
-                    double x = log10AlleleFrequencyPosteriors[k];
-                    System.out.printf("  %d\t%.2f\t%.2f\t%b%n", k,
-                            x < -1e10 ? Double.NEGATIVE_INFINITY : x, gsPosteriors[k],
-                            log10AlleleFrequencyPosteriors[k] == gsPosteriors[k]);
-                }
-                System.out.printf("MAD_AC\t%d\t%d\t%.2f\t%.2f\t%.6f%n",
-                        ref.getLocus().getStart(), lastK, log10thisPVar, log10gsPVar, log10thisPVar - log10gsPVar);
-            }
-        }
-
    }

-    private static final double[][] getGLs(Map<String, Genotype> GLs) {
-        double[][] genotypeLikelihoods = new double[GLs.size()+1][];
+    private static final ArrayList<double[]> getGLs(Map<String, Genotype> GLs) {
+        ArrayList<double[]> genotypeLikelihoods = new ArrayList<double[]>();

-        int j = 0;
+        genotypeLikelihoods.add(new double[]{0.0,0.0,0.0}); // dummy
        for ( Genotype sample : GLs.values() ) {
-            j++;
-
            if ( sample.hasLikelihoods() ) {
-                //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
-                genotypeLikelihoods[j] = sample.getLikelihoods().getAsVector();
+                double[] gls = sample.getLikelihoods().getAsVector();
+
+                if (MathUtils.sum(gls) < SUM_GL_THRESH_NOCALL)
+                    genotypeLikelihoods.add(gls);
            }
        }

        return genotypeLikelihoods;
    }

+
    // -------------------------------------------------------------------------------------
    //
    // Linearized, ~O(N), implementation.
@ -237,90 +162,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        }
    }

-    // now with banding
-    public int linearExactBanded(Map<String, Genotype> GLs,
-                                 double[] log10AlleleFrequencyPriors,
-                                 double[] log10AlleleFrequencyPosteriors) {
-        throw new NotImplementedException();
-//        final int numSamples = GLs.size();
-//        final int numChr = 2*numSamples;
-//        final double[][] genotypeLikelihoods = getGLs(GLs);
-//
-//        final ExactACCache logY = new ExactACCache(numSamples+1);
-//        logY.getkMinus0()[0] = 0.0; // the zero case
-//
-//        double maxLog10L = Double.NEGATIVE_INFINITY;
-//        boolean done = false;
-//        int lastK = -1;
-//        final int BAND_SIZE = 10;
-//
-//        for (int k=0; k <= numChr && ! done; k++ ) {
-//            final double[] kMinus0 = logY.getkMinus0();
-//            int jStart = Math.max(k - BAND_SIZE, 1);
-//            int jStop = Math.min(k + BAND_SIZE, numSamples);
-//
-//            if ( k == 0 ) { // special case for k = 0
-//                for ( int j=1; j <= numSamples; j++ ) {
-//                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][GenotypeType.AA.ordinal()];
-//                }
-//            } else { // k > 0
-//                final double[] kMinus1 = logY.getkMinus1();
-//                final double[] kMinus2 = logY.getkMinus2();
-//                Arrays.fill(kMinus0,0);
-//
-//                for ( int j = jStart; j <= jStop; j++ ) {
-//                    final double[] gl = genotypeLikelihoods[j];
-//                    final double logDenominator = log10Cache[2*j] + log10Cache[2*j-1];
-//
-//                    double aa = Double.NEGATIVE_INFINITY;
-//                    double ab = Double.NEGATIVE_INFINITY;
-//                    if (k < 2*j-1)
-//                        aa = log10Cache[2*j-k] + log10Cache[2*j-k-1] + kMinus0[j-1] + gl[GenotypeType.AA.ordinal()];
-//
-//                    if (k < 2*j)
-//                        ab = log10Cache[2*k] + log10Cache[2*j-k]+ kMinus1[j-1] + gl[GenotypeType.AB.ordinal()];
-//
-//                    double log10Max;
-//                    if (k > 1) {
-//                        final double bb = log10Cache[k] + log10Cache[k-1] + kMinus2[j-1] + gl[GenotypeType.BB.ordinal()];
-//                        log10Max = approximateLog10SumLog10(aa, ab, bb);
-//                    } else {
-//                        // we know we aren't considering the BB case, so we can use an optimized log10 function
-//                        log10Max = approximateLog10SumLog10(aa, ab);
-//                    }
-//
-//                    // finally, update the L(j,k) value
-//                    kMinus0[j] = log10Max - logDenominator;
-//
-//                    String offset = Utils.dupString(' ',k);
-//                    System.out.printf("%s%3d %3d %.2f%n", offset, k, j, kMinus0[j]);
-//                }
-//            }
-//
-//            // update the posteriors vector
-//            final double log10LofK = kMinus0[jStop];
-//            log10AlleleFrequencyPosteriors[k] = log10LofK + log10AlleleFrequencyPriors[k];
-//
-//            // can we abort early?
-//            lastK = k;
-//            maxLog10L = Math.max(maxLog10L, log10LofK);
-//            if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
-//                if ( DEBUG ) System.out.printf("  *** breaking early k=%d log10L=%.2f maxLog10L=%.2f%n", k, log10LofK, maxLog10L);
-//                done = true;
-//            }
-//
-//            logY.rotate();
-//        }
-//
-//        return lastK;
-    }
-
    public int linearExact(Map<String, Genotype> GLs,
                           double[] log10AlleleFrequencyPriors,
                           double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
-        final int numSamples = GLs.size();
+        final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
+        final int numSamples = genotypeLikelihoods.size()-1;
        final int numChr = 2*numSamples;
-        final double[][] genotypeLikelihoods = getGLs(GLs);

        final ExactACCache logY = new ExactACCache(numSamples+1);
        logY.getkMinus0()[0] = 0.0; // the zero case
@ -334,14 +181,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

            if ( k == 0 ) { // special case for k = 0
                for ( int j=1; j <= numSamples; j++ ) {
-                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods[j][idxAA];
+                    kMinus0[j] = kMinus0[j-1] + genotypeLikelihoods.get(j)[idxAA];
                }
            } else { // k > 0
                final double[] kMinus1 = logY.getkMinus1();
                final double[] kMinus2 = logY.getkMinus2();

                for ( int j=1; j <= numSamples; j++ ) {
-                    final double[] gl = genotypeLikelihoods[j];
+                    final double[] gl = genotypeLikelihoods.get(j);
                    final double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];

                    double aa = Double.NEGATIVE_INFINITY;
@ -434,10 +281,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        if ( !vc.isVariant() )
            throw new UserException("The VCF record passed in does not contain an ALT allele at " + vc.getChr() + ":" + vc.getStart());

-        boolean multiAllelicRecord = false;
-
-        if (vc.getAlternateAlleles().size() > 1)
-            multiAllelicRecord = true;

        Map<String, Genotype> GLs = vc.getGenotypes();
        double[][] pathMetricArray = new double[GLs.size()+1][AFofMaxLikelihood+1];
@ -454,7 +297,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
        pathMetricArray[0][0] = 0.0;

        // todo = can't deal with optimal dynamic programming solution with multiallelic records
-        if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord) {
+        if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
            sampleIndices.addAll(GLs.keySet());
            sampleIdx = GLs.size();
        }
@ -465,6 +308,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
                    continue;

                double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();
+
+                if (MathUtils.sum(likelihoods) > SUM_GL_THRESH_NOCALL)     {
+                    //System.out.print(sample.getKey()+":");
+                    //for (int k=0; k < likelihoods.length; k++)
+                    //   System.out.format("%4.2f ",likelihoods[k]);
+                    //System.out.println();
+                    // all likelihoods are essentially the same: skip this sample and will later on force no call.
+                    //sampleIdx++;
+                    continue;
+                }
+
                sampleIndices.add(sample.getKey());

                for (int k=0; k <= AFofMaxLikelihood; k++) {
@ -504,22 +358,25 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            Genotype g = GLs.get(sample);
            if ( !g.hasLikelihoods() )
                continue;
-
-            if (SIMPLE_GREEDY_GENOTYPER || multiAllelicRecord)
-                bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
-            else {
-                int newIdx = tracebackArray[k][startIdx];
-                bestGTguess = startIdx - newIdx;
-                startIdx = newIdx;
-            }
-
+            // if all likelihoods are essentially the same: we want to force no-call. In this case, we skip this sample for now,
+            // and will add no-call genotype to GL's in a second pass
            ArrayList<Allele> myAlleles = new ArrayList<Allele>();

            double qual = Double.NEGATIVE_INFINITY;
            double[] likelihoods = g.getLikelihoods().getAsVector();
+
+            if (SIMPLE_GREEDY_GENOTYPER || !vc.isBiallelic()) {
+                bestGTguess = Utils.findIndexOfMaxEntry(g.getLikelihoods().getAsVector());
+            }
+            else {
+                int newIdx = tracebackArray[k][startIdx];;
+                bestGTguess = startIdx - newIdx;
+                startIdx = newIdx;
+            }
+
            /*           System.out.format("Sample: %s GL:",sample);
                    for (int i=0; i < likelihoods.length; i++)
-                        System.out.format("%1.4f ",likelihoods[i]);
+                        System.out.format("%1.4f, ",likelihoods[i]);
            */

            for (int i=0; i < likelihoods.length; i++) {
@ -570,83 +427,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {

        }

-        return calls;
-    }
-
-    // -------------------------------------------------------------------------------------
-    //
-    // Gold standard, but O(N^2), implementation.
-    //
-    // TODO -- remove me for clarity in this code
-    //
-    // -------------------------------------------------------------------------------------
-    public int gdaN2GoldStandard(Map<String, Genotype> GLs,
-                                 double[] log10AlleleFrequencyPriors,
-                                 double[] log10AlleleFrequencyPosteriors, int idxAA, int idxAB, int idxBB) {
-        int numSamples = GLs.size();
-        int numChr = 2*numSamples;
-
-        double[][] logYMatrix = new double[1+numSamples][1+numChr];
-
-        for (int i=0; i <=numSamples; i++)
-            for (int j=0; j <=numChr; j++)
-                logYMatrix[i][j] = Double.NEGATIVE_INFINITY;
-
-        //YMatrix[0][0] = 1.0;
-        logYMatrix[0][0] = 0.0;
-        int j=0;
-
        for ( Map.Entry<String, Genotype> sample : GLs.entrySet() ) {
-            j++;

            if ( !sample.getValue().hasLikelihoods() )
                continue;
+            Genotype g = GLs.get(sample.getKey());

-            //double[] genotypeLikelihoods = MathUtils.normalizeFromLog10(GLs.get(sample).getLikelihoods());
-            double[] genotypeLikelihoods = sample.getValue().getLikelihoods().getAsVector();
-            //double logDenominator = Math.log10(2.0*j*(2.0*j-1));
-            double logDenominator = MathUtils.log10Cache[2*j] + MathUtils.log10Cache[2*j-1];
+            double[] likelihoods = sample.getValue().getLikelihoods().getAsVector();

-            // special treatment for k=0: iteration reduces to:
-            //YMatrix[j][0] = YMatrix[j-1][0]*genotypeLikelihoods[GenotypeType.AA.ordinal()];
-            logYMatrix[j][0] = logYMatrix[j-1][0] + genotypeLikelihoods[idxAA];
+            if (MathUtils.sum(likelihoods) <= SUM_GL_THRESH_NOCALL)
+                continue; // regular likelihoods

-            for (int k=1; k <= 2*j; k++ ) {
-
-                //double num = (2.0*j-k)*(2.0*j-k-1)*YMatrix[j-1][k] * genotypeLikelihoods[GenotypeType.AA.ordinal()];
-                double logNumerator[];
-                logNumerator = new double[3];
-                if (k < 2*j-1)
-                    logNumerator[0] = MathUtils.log10Cache[2*j-k] + MathUtils.log10Cache[2*j-k-1] + logYMatrix[j-1][k] +
-                            genotypeLikelihoods[idxAA];
-                else
-                    logNumerator[0] = Double.NEGATIVE_INFINITY;
-
-
-                if (k < 2*j)
-                    logNumerator[1] = MathUtils.log10Cache[2*k] + MathUtils.log10Cache[2*j-k]+ logYMatrix[j-1][k-1] +
-                            genotypeLikelihoods[idxAB];
-                else
-                    logNumerator[1] = Double.NEGATIVE_INFINITY;
-
-                if (k > 1)
-                    logNumerator[2] = MathUtils.log10Cache[k] + MathUtils.log10Cache[k-1] + logYMatrix[j-1][k-2] +
-                            genotypeLikelihoods[idxBB];
-                else
-                    logNumerator[2] = Double.NEGATIVE_INFINITY;
-
-                double logNum = MathUtils.softMax(logNumerator);
-
-                //YMatrix[j][k] = num/den;
-                logYMatrix[j][k] = logNum - logDenominator;
-            }
+            ArrayList<Allele> myAlleles = new ArrayList<Allele>();

+            double qual = Genotype.NO_NEG_LOG_10PERROR;
+            myAlleles.add(Allele.NO_CALL);
+            myAlleles.add(Allele.NO_CALL);
+            //System.out.println(myAlleles.toString());
+            calls.put(sample.getKey(), new Genotype(sample.getKey(), myAlleles, qual, null, g.getAttributes(), false));
        }
-
-        for (int k=0; k <= numChr; k++)
-            log10AlleleFrequencyPosteriors[k] = logYMatrix[j][k] + log10AlleleFrequencyPriors[k];
-
-        return numChr;
+        return calls;
    }

    private final static void printLikelihoods(int numChr, double[][] logYMatrix, double[] log10AlleleFrequencyPriors) {
@ -657,5 +457,4 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
            System.out.printf("  %4d\t%8.2f\t%8.2f\t%8.2f%n", k, logYMatrix[j][k], log10AlleleFrequencyPriors[k], posterior);
        }
    }
-
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/IndelGenotypeLikelihoodsCalculationModel.java
@ -32,10 +32,11 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.indels.HaplotypeIndelErrorModel;
 import org.broadinstitute.sting.gatk.walkers.indels.PairHMMIndelErrorModel;
+import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
@ -70,9 +71,6 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood

    // gdebug removeme
    // todo -cleanup
-    private HaplotypeIndelErrorModel model;
-    private boolean useOldWrongHorribleHackedUpLikelihoodModel = false;
-//
    private GenomeLoc lastSiteVisited;
    private ArrayList<Allele> alleleList;

@ -83,26 +81,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood

    protected IndelGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) {
        super(UAC, logger);
-        if (UAC.GSA_PRODUCTION_ONLY == false) {
-            pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
-                    UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
-            useOldWrongHorribleHackedUpLikelihoodModel = false;
-        }
-        else {
-            useOldWrongHorribleHackedUpLikelihoodModel = true;
-            double INSERTION_START_PROBABILITY = 1e-3;
-
-            double INSERTION_END_PROBABILITY = 0.5;
-
-            double ALPHA_DELETION_PROBABILITY = 1e-3;
-
-
-            model = new HaplotypeIndelErrorModel(3, INSERTION_START_PROBABILITY,
-                    INSERTION_END_PROBABILITY,ALPHA_DELETION_PROBABILITY,UAC.INDEL_HAPLOTYPE_SIZE, false, UAC.OUTPUT_DEBUG_INDEL_INFO);
-        }
-
-        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,
-                    UAC.OUTPUT_DEBUG_INDEL_INFO, UAC.DO_CONTEXT_DEPENDENT_PENALTIES, UAC.dovit, UAC.GET_GAP_PENALTIES_FROM_DATA, UAC.INDEL_RECAL_FILE);
+        pairModel = new PairHMMIndelErrorModel(UAC.INDEL_GAP_OPEN_PENALTY,UAC.INDEL_GAP_CONTINUATION_PENALTY,UAC.OUTPUT_DEBUG_INDEL_INFO);
        alleleList = new ArrayList<Allele>();
        getAlleleListFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
        minIndelCountForGenotyping = UAC.MIN_INDEL_COUNT_FOR_GENOTYPING;
@ -321,7 +300,7 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
            haplotypeMap.clear();

            if (getAlleleListFromVCF) {
-                 for( final VariantContext vc_input : tracker.getValues(UAC.alleles) ) {
+                 for( final VariantContext vc_input : tracker.getValues(UAC.alleles, loc) ) {
                      if( vc_input != null &&
                              allowableTypes.contains(vc_input.getType()) &&
                              ref.getLocus().getStart() == vc_input.getStart()) {
@ -382,20 +361,17 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                }
            }
        }
-        int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
-        int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
-        int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;

-        if (useOldWrongHorribleHackedUpLikelihoodModel) {
-            numPrefBases = 20;
-            hsize=80;
-        }
+        final int eventLength = altAllele.getBaseString().length() - refAllele.getBaseString().length();
+        final int hsize = (int)ref.getWindow().size()-Math.abs(eventLength)-1;
+        final int numPrefBases= ref.getLocus().getStart()-ref.getWindow().getStart()+1;
+
        if (DEBUG)
            System.out.format("hsize: %d eventLength: %d refSize: %d, locStart: %d numpr: %d\n",hsize,eventLength,
                    (int)ref.getWindow().size(), loc.getStart(), numPrefBases);
        //System.out.println(eventLength);
-        haplotypeMap = Haplotype.makeHaplotypeListFromAlleles( alleleList, loc.getStart(),
-            ref, hsize, numPrefBases);
+        haplotypeMap = Haplotype.makeHaplotypeListFromAlleles(alleleList, loc.getStart(),
+                ref, hsize, numPrefBases);

        // For each sample, get genotype likelihoods based on pileup
        // compute prior likelihoods on haplotypes, and initialize haplotype likelihood matrix with them.
@ -412,17 +388,9 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
                pileup = context.getBasePileup();

            if (pileup != null ) {
-                double[] genotypeLikelihoods;
-                if (useOldWrongHorribleHackedUpLikelihoodModel)
-                   genotypeLikelihoods = model.computeReadHaplotypeLikelihoods( pileup, haplotypeMap);
-                else
-                    genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());
+                final double[] genotypeLikelihoods = pairModel.computeReadHaplotypeLikelihoods( pileup, haplotypeMap, ref, eventLength, getIndelLikelihoodMap());

-
-
-                // which genotype likelihoods correspond to two most likely alleles? By convention, likelihood vector is ordered as for example
-                // for 3 alleles it's 00 01 11 02 12 22
-                 GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
+                GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
                        alleleList,
                        genotypeLikelihoods,
                        getFilteredDepth(pileup)));
@ -444,4 +412,16 @@ public class IndelGenotypeLikelihoodsCalculationModel extends GenotypeLikelihood
        return indelLikelihoodMap.get();
    }

+    // Overload function in GenotypeLikelihoodsCalculationModel so that, for an indel case, we consider a deletion as part of the pileup,
+    // so that per-sample DP will include deletions covering the event.
+    protected int getFilteredDepth(ReadBackedPileup pileup) {
+        int count = 0;
+        for ( PileupElement p : pileup ) {
+            if (p.isDeletion() || BaseUtils.isRegularBase(p.getBase()) )
+                count++;
+        }
+
+        return count;
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java
@ -26,16 +26,14 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.utils.BaseUtils;
+import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.genotype.DiploidGenotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@ -58,25 +56,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES;
    }

-    public static VariantContext getSNPVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
-        if ( tracker == null || ref == null || logger == null )
-            throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
-        VariantContext vc = null;
-
-        // search for usable record
-        for( final VariantContext vc_input : tracker.getValues(allelesBinding) ) {
-            if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
-                if ( vc == null ) {
-                    vc = vc_input;
-                } else {
-                    logger.warn("Multiple valid VCF records detected at site " + ref.getLocus() + ", only considering alleles from first record");
-                }
-            }
-        }
-
-        return vc;
-    }
-
    public Allele getLikelihoods(RefMetaDataTracker tracker,
                                 ReferenceContext ref,
                                 Map<String, AlignmentContext> contexts,
@ -96,7 +75,7 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
        if ( alternateAlleleToUse != null ) {
            bestAlternateAllele = alternateAlleleToUse.getBases()[0];
        } else if ( useAlleleFromVCF ) {
-            VariantContext vc = getSNPVCFromAllelesRod(tracker, ref, true, logger, UAC.alleles);
+            VariantContext vc = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, ref.getLocus(), true, logger, UAC.alleles);

            // ignore places where we don't have a variant
            if ( vc == null )
@ -143,8 +122,10 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC
            aList.add(refAllele);
            aList.add(altAllele);
            double[] dlike = new double[]{likelihoods[refGenotype.ordinal()],likelihoods[hetGenotype.ordinal()],likelihoods[homGenotype.ordinal()]} ;
+
+            // normalize in log space so that max element is zero.
            GLs.put(sample.getKey(), new MultiallelicGenotypeLikelihoods(sample.getKey(),
-                    aList,  dlike, getFilteredDepth(pileup)));
+                    aList,  MathUtils.normalizeFromLog10(dlike, false, true), getFilteredDepth(pileup)));
        }

        return refAllele;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UGCallVariants.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedArgumentCollection.java
@ -143,35 +143,21 @@ public class UnifiedArgumentCollection {
    @Hidden
    @Argument(fullName = "indelHaplotypeSize", shortName = "indelHSize", doc = "Indel haplotype size", required = false)
    public int INDEL_HAPLOTYPE_SIZE = 80;
-    @Hidden
-    @Argument(fullName = "doContextDependentGapPenalties", shortName = "doCDP", doc = "Vary gap penalties by context", required = false)
-     public boolean DO_CONTEXT_DEPENDENT_PENALTIES = true;
+
    //gdebug+
    // experimental arguments, NOT TO BE USED BY ANYONE WHOSE INITIALS AREN'T GDA!!!
-    @Hidden
-    @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
-    public boolean GET_GAP_PENALTIES_FROM_DATA = false;
-
-    @Hidden
-    @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
-    public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");
+//    @Hidden
+//    @Argument(fullName = "getGapPenaltiesFromData", shortName = "dataGP", doc = "Vary gap penalties by context - EXPERIMENTAL, DO NO USE", required = false)
+//    public boolean GET_GAP_PENALTIES_FROM_DATA = false;
+//
+//    @Hidden
+//    @Argument(fullName="indel_recal_file", shortName="recalFile", required=false, doc="Filename for the input covariates table recalibration .csv file - EXPERIMENTAL, DO NO USE")
+//    public File INDEL_RECAL_FILE = new File("indel.recal_data.csv");

    @Hidden
    @Argument(fullName = "indelDebug", shortName = "indelDebug", doc = "Output indel debug info", required = false)
    public boolean OUTPUT_DEBUG_INDEL_INFO = false;

-    @Hidden
-    @Argument(fullName = "dovit", shortName = "dovit", doc = "Perform full Viterbi calculation when evaluating the HMM", required = false)
-    public boolean dovit = false;
-
-    @Hidden
-    @Argument(fullName = "GSA_PRODUCTION_ONLY", shortName = "GSA_PRODUCTION_ONLY", doc = "don't ever use me", required = false)
-    public boolean GSA_PRODUCTION_ONLY = false;
-
-    @Hidden
-    @Argument(fullName = "exactCalculation", shortName = "exactCalculation", doc = "expt", required = false)
-    public ExactAFCalculationModel.ExactCalculation EXACT_CALCULATION_TYPE = ExactAFCalculationModel.ExactCalculation.LINEAR_EXPERIMENTAL;
-
    @Hidden
    @Argument(fullName = "ignoreSNPAlleles", shortName = "ignoreSNPAlleles", doc = "expt", required = false)
    public boolean IGNORE_SNP_ALLELES = false;
@ -191,7 +177,6 @@ public class UnifiedArgumentCollection {

        uac.GLmodel = GLmodel;
        uac.AFmodel = AFmodel;
-        uac.EXACT_CALCULATION_TYPE = EXACT_CALCULATION_TYPE;
        uac.heterozygosity = heterozygosity;
        uac.PCR_error = PCR_error;
        uac.GenotypingMode = GenotypingMode;
@ -209,15 +194,10 @@ public class UnifiedArgumentCollection {
        uac.INDEL_GAP_CONTINUATION_PENALTY = INDEL_GAP_CONTINUATION_PENALTY;
        uac.OUTPUT_DEBUG_INDEL_INFO = OUTPUT_DEBUG_INDEL_INFO;
        uac.INDEL_HAPLOTYPE_SIZE = INDEL_HAPLOTYPE_SIZE;
-        uac.DO_CONTEXT_DEPENDENT_PENALTIES = DO_CONTEXT_DEPENDENT_PENALTIES;
        uac.alleles = alleles;

-        uac.GET_GAP_PENALTIES_FROM_DATA = GET_GAP_PENALTIES_FROM_DATA;
-        uac.INDEL_RECAL_FILE = INDEL_RECAL_FILE;
        // todo- arguments to remove
        uac.COVERAGE_AT_WHICH_TO_ABORT = COVERAGE_AT_WHICH_TO_ABORT;
-        uac.dovit = dovit;
-        uac.GSA_PRODUCTION_ONLY = GSA_PRODUCTION_ONLY;
        uac.IGNORE_SNP_ALLELES = IGNORE_SNP_ALLELES;
        
        return uac;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

@ -127,7 +126,8 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
    @ArgumentCollection
    protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
    public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
-    public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
+    public RodBinding<VariantContext> getVariantRodBinding() { return null; }
+    public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
    public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
    public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }

@ -210,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        if ( verboseWriter != null )
            verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");

-        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
+        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
        UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);

        // initialize the header
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@ -27,6 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;

 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
+import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
@ -36,13 +37,11 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.GenotypeLikelihoods;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.*;

 import java.io.PrintStream;
 import java.util.*;
@ -236,10 +235,11 @@ public class UnifiedGenotyperEngine {
    private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
        VariantContext vc;
        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-            VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, ref, false, logger, UAC.alleles);
+            VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, ref, rawContext.getLocation(), false, logger, UAC.alleles);
            if ( vcInput == null )
                return null;
-            vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles());
+            vc = new VariantContext("UG_call", vcInput.getChr(), vcInput.getStart(), vcInput.getEnd(), vcInput.getAlleles(), InferredGeneticContext.NO_NEG_LOG_10PERROR, null, null, ref.getBase());
+
        } else {
            // deal with bad/non-standard reference bases
            if ( !Allele.acceptableAlleleBases(new byte[]{ref.getBase()}) )
@ -544,6 +544,21 @@ public class UnifiedGenotyperEngine {
            AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
    }

+    private final static double[] binomialProbabilityDepthCache = new double[10000];
+    static {
+        for ( int i = 1; i < binomialProbabilityDepthCache.length; i++ ) {
+            binomialProbabilityDepthCache[i] = MathUtils.binomialProbability(0, i, 0.5);
+        }
+    }
+
+    private final double getRefBinomialProb(final int depth) {
+        if ( depth < binomialProbabilityDepthCache.length )
+            return binomialProbabilityDepthCache[depth];
+        else
+            return MathUtils.binomialProbability(0, depth, 0.5);
+    }
+
+
    private VariantCallContext estimateReferenceConfidence(VariantContext vc, Map<String, AlignmentContext> contexts, double theta, boolean ignoreCoveredSamples, double initialPofRef) {
        if ( contexts == null )
            return null;
@ -567,7 +582,7 @@ public class UnifiedGenotyperEngine {
                    depth = context.getExtendedEventPileup().size();
            }

-            P_of_ref *= 1.0 - (theta / 2.0) * MathUtils.binomialProbability(0, depth, 0.5);
+            P_of_ref *= 1.0 - (theta / 2.0) * getRefBinomialProb(depth);
        }

        return new VariantCallContext(vc, QualityUtils.phredScaleErrorRate(1.0 - P_of_ref) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING, false);
@ -635,7 +650,7 @@ public class UnifiedGenotyperEngine {
            // no extended event pileup
            // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
            if (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES) {
-                VariantContext vcInput = SNPGenotypeLikelihoodsCalculationModel.getSNPVCFromAllelesRod(tracker, refContext, false, logger, UAC.alleles);
+                VariantContext vcInput = UnifiedGenotyperEngine.getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
                if (vcInput == null)
                    return null;

@ -741,4 +756,23 @@ public class UnifiedGenotyperEngine {

        return afcm;
    }
+
+    public static VariantContext getVCFromAllelesRod(RefMetaDataTracker tracker, ReferenceContext ref, GenomeLoc loc, boolean requireSNP, Logger logger, final RodBinding<VariantContext> allelesBinding) {
+        if ( tracker == null || ref == null || logger == null )
+            throw new ReviewedStingException("Bad arguments: tracker=" + tracker + " ref=" + ref + " logger=" + logger);
+        VariantContext vc = null;
+
+        // search for usable record
+        for( final VariantContext vc_input : tracker.getValues(allelesBinding, loc) ) {
+            if ( vc_input != null && ! vc_input.isFiltered() && (! requireSNP || vc_input.isSNP() )) {
+                if ( vc == null ) {
+                    vc = vc_input;
+                } else {
+                    logger.warn("Multiple valid VCF records detected in the alleles input file at site " + ref.getLocus() + ", only considering the first record");
+                }
+            }
+        }
+
+        return vc;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/HaplotypeIndelErrorModel.java
@ -26,9 +26,9 @@
 package org.broadinstitute.sting.gatk.walkers.indels;

 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
-import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
@ -73,7 +73,7 @@ public class HaplotypeIndelErrorModel {
        baseMatchArray = new double[MAX_CACHED_QUAL+1];
        baseMismatchArray = new double[MAX_CACHED_QUAL+1];
        for (int k=1; k <= MAX_CACHED_QUAL; k++) {
-            double baseProb = QualityUtils.qualToProb(k);
+            double baseProb = QualityUtils.qualToProb((byte)k);


            baseMatchArray[k] =  probToQual(baseProb);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -28,9 +28,10 @@ package org.broadinstitute.sting.gatk.walkers.indels;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.genotype.Haplotype;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -50,36 +51,8 @@ import org.broadinstitute.sting.oneoffprojects.walkers.IndelCountCovariates.Reca


 public class PairHMMIndelErrorModel {
-
-
    public static final int BASE_QUAL_THRESHOLD = 20;

-
-    private static final int MATCH_OFFSET = 0;
-    private static final int X_OFFSET = 1;
-    private static final int Y_OFFSET = 2;
-
-    private static final int DIAG = 0;
-    private static final int UP = 1;
-    private static final int LEFT = 2;
-
-    private static final int DIAG_GOTO_M = 0;
-    private static final int DIAG_GOTO_X = 1;
-    private static final int DIAG_GOTO_Y = 2;
-
-    private static final int UP_GOTO_M = 4;
-    private static final int UP_GOTO_X = 5;
-    private static final int UP_GOTO_Y = 6;
-
-    private static final int LEFT_GOTO_M = 8;
-    private static final int LEFT_GOTO_X = 9;
-    private static final int LEFT_GOTO_Y = 10;
-
-    private static final int[] ACTIONS_M = {DIAG_GOTO_M, DIAG_GOTO_X, DIAG_GOTO_Y};
-    private static final int[] ACTIONS_X = {UP_GOTO_M, UP_GOTO_X, UP_GOTO_Y};
-    private static final int[] ACTIONS_Y = {LEFT_GOTO_M, LEFT_GOTO_X, LEFT_GOTO_Y};
-
-
    private final double logGapOpenProbability;
    private final double logGapContinuationProbability;

@ -100,36 +73,13 @@ public class PairHMMIndelErrorModel {
    private static final double MIN_GAP_CONT_PENALTY = 10.0;
    private static final double GAP_PENALTY_HRUN_STEP = 1.0; // each increase in hrun decreases gap penalty by this.

-
-    private boolean doViterbi = false;
-
-    private final boolean useAffineGapModel = true;
-    private boolean doContextDependentPenalties = false;
-
    private final double[] GAP_OPEN_PROB_TABLE;
    private final double[] GAP_CONT_PROB_TABLE;

-    private boolean getGapPenaltiesFromFile = false;
-
-    private int SMOOTHING = 1;
-    private int MAX_QUALITY_SCORE = 50;
-    private int PRESERVE_QSCORES_LESS_THAN = 5;
-
    /////////////////////////////
    // Private Member Variables
    /////////////////////////////
-//copy+
-/*    private RecalDataManager dataManager; // Holds the data HashMap, mostly used by TableRecalibrationWalker to create collapsed data hashmaps
-    private final ArrayList<Covariate> requestedCovariates = new ArrayList<Covariate>(); // List of covariates to be used in this calculation
-    private static final Pattern COMMENT_PATTERN = Pattern.compile("^#.*");
-    private static final Pattern OLD_RECALIBRATOR_HEADER = Pattern.compile("^rg,.*");
-    private static final Pattern COVARIATE_PATTERN = Pattern.compile("^ReadGroup,QualityScore,.*");
-    protected static final String EOF_MARKER = "EOF";
-    private long numReadsWithMalformedColorSpace = 0;
-    private RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection();
-    private NestedHashMap qualityScoreByFullCovariateKey = new NestedHashMap(); // Caches the result of performSequentialQualityCalculation(..) for all sets of covariate values.
-  */
-//copy-
+
    static {
        LOG_ONE_HALF= -Math.log10(2.0);
        END_GAP_COST = LOG_ONE_HALF;
@ -145,141 +95,9 @@ public class PairHMMIndelErrorModel {
        }
    }

-    public  PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit,boolean gpf, File RECAL_FILE) {
-
-        this(indelGOP, indelGCP, deb, doCDP, dovit);
-        this.getGapPenaltiesFromFile = gpf;
-
-        // read data from recal file
-        // gdebug - start copy from TableRecalibrationWalker
-/*        if (gpf) {
-            boolean sawEOF = false;
-            boolean REQUIRE_EOF = false;
-
-            int lineNumber = 0;
-            boolean foundAllCovariates = false;
-            // Get a list of all available covariates
-            final List<Class<? extends Covariate>> classes = new PluginManager<Covariate>(Covariate.class).getPlugins();
-
-            try {
-                for ( String line : new XReadLines(RECAL_FILE) ) {
-                    lineNumber++;
-                    if ( EOF_MARKER.equals(line) ) {
-                        sawEOF = true;
-                    } else if( COMMENT_PATTERN.matcher(line).matches() || OLD_RECALIBRATOR_HEADER.matcher(line).matches() )  {
-                        ; // Skip over the comment lines, (which start with '#')
-                    }
-                    // Read in the covariates that were used from the input file
-                    else if( COVARIATE_PATTERN.matcher(line).matches() ) { // The line string is either specifying a covariate or is giving csv data
-                        if( foundAllCovariates ) {
-                            throw new UserException.MalformedFile( RECAL_FILE, "Malformed input recalibration file. Found covariate names intermingled with data in file: " + RECAL_FILE );
-                        } else { // Found the covariate list in input file, loop through all of them and instantiate them
-                            String[] vals = line.split(",");
-                            for( int iii = 0; iii < vals.length - 3; iii++ ) { // There are n-3 covariates. The last three items are nObservations, nMismatch, and Qempirical
-                                boolean foundClass = false;
-                                for( Class<?> covClass : classes ) {
-                                    if( (vals[iii] + "Covariate").equalsIgnoreCase( covClass.getSimpleName() ) ) {
-                                        foundClass = true;
-                                        try {
-                                            Covariate covariate = (Covariate)covClass.newInstance();
-                                            requestedCovariates.add( covariate );
-                                        } catch (Exception e) {
-                                            throw new DynamicClassResolutionException(covClass, e);
-                                        }
-
-                                    }
-                                }
-
-                                if( !foundClass ) {
-                                    throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration file. The requested covariate type (" + (vals[iii] + "Covariate") + ") isn't a valid covariate option." );
-                                }
-                            }
-                        }
-
-                    } else { // Found a line of data
-                        if( !foundAllCovariates ) {
-                            foundAllCovariates = true;
-
-                            // At this point all the covariates should have been found and initialized
-                            if( requestedCovariates.size() < 2 ) {
-                                throw new UserException.MalformedFile(RECAL_FILE, "Malformed input recalibration csv file. Covariate names can't be found in file: " + RECAL_FILE );
-                            }
-
-                            final boolean createCollapsedTables = true;
-
-                            // Initialize any covariate member variables using the shared argument collection
-                            for( Covariate cov : requestedCovariates ) {
-                                cov.initialize( RAC );
-                            }
-                            // Initialize the data hashMaps
-                            dataManager = new RecalDataManager( createCollapsedTables, requestedCovariates.size() );
-
-                        }
-                        addCSVData(RECAL_FILE, line); // Parse the line and add the data to the HashMap
-                    }
-                }
-
-            } catch ( FileNotFoundException e ) {
-                throw new UserException.CouldNotReadInputFile(RECAL_FILE, "Can not find input file", e);
-            } catch ( NumberFormatException e ) {
-                throw new UserException.MalformedFile(RECAL_FILE, "Error parsing recalibration data at line " + lineNumber + ". Perhaps your table was generated by an older version of CovariateCounterWalker.");
-            }
-
-            if ( !sawEOF ) {
-                final String errorMessage = "No EOF marker was present in the recal covariates table; this could mean that the file is corrupted or was generated with an old version of the CountCovariates tool.";
-                if ( REQUIRE_EOF )
-                    throw new UserException.MalformedFile(RECAL_FILE, errorMessage);
-            }
-
-            if( dataManager == null ) {
-                throw new UserException.MalformedFile(RECAL_FILE, "Can't initialize the data manager. Perhaps the recal csv file contains no data?");
-            }
-
-            // Create the tables of empirical quality scores that will be used in the sequential calculation
-            dataManager.generateEmpiricalQualities( SMOOTHING, MAX_QUALITY_SCORE );
-        }
-        // debug end copy
-  */
-    }
-    /**
-     * For each covariate read in a value and parse it. Associate those values with the data itself (num observation and num mismatches)
-     */
- /*
-    private void addCSVData(final File file, final String line) {
-        final String[] vals = line.split(",");
-
-        // Check if the data line is malformed, for example if the read group string contains a comma then it won't be parsed correctly
-        if( vals.length != requestedCovariates.size() + 3 ) { // +3 because of nObservations, nMismatch, and Qempirical
-            throw new UserException.MalformedFile(file, "Malformed input recalibration file. Found data line with too many fields: " + line +
-                    " --Perhaps the read group string contains a comma and isn't being parsed correctly.");
-        }
-
-        final Object[] key = new Object[requestedCovariates.size()];
-        Covariate cov;
-        int iii;
-        for( iii = 0; iii < requestedCovariates.size(); iii++ ) {
-            cov = requestedCovariates.get( iii );
-            key[iii] = cov.getValue( vals[iii] );
-        }
-
-        // Create a new datum using the number of observations, number of mismatches, and reported quality score
-        final RecalDatum datum = new RecalDatum( Long.parseLong( vals[iii] ), Long.parseLong( vals[iii + 1] ), Double.parseDouble( vals[1] ), 0.0 );
-        // Add that datum to all the collapsed tables which will be used in the sequential calculation
-        dataManager.addToAllTables( key, datum, PRESERVE_QSCORES_LESS_THAN );
-    }
-
-*/
-    public  PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP, boolean dovit) {
-        this(indelGOP, indelGCP, deb, doCDP);
-        this.doViterbi = dovit;
-    }
-
-    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb, boolean doCDP) {
-
-
+    public PairHMMIndelErrorModel(double indelGOP, double indelGCP, boolean deb) {
        this.logGapOpenProbability = -indelGOP/10.0; // QUAL to log prob
        this.logGapContinuationProbability = -indelGCP/10.0; // QUAL to log prob
-        this.doContextDependentPenalties = doCDP;
        this.DEBUG = deb;


@ -313,132 +131,6 @@ public class PairHMMIndelErrorModel {

    }

-    private double computeReadLikelihoodGivenHaplotype(byte[] haplotypeBases, byte[] readBases, byte[] readQuals) {
-        final int X_METRIC_LENGTH = readBases.length+1;
-        final int Y_METRIC_LENGTH = haplotypeBases.length+1;
-
-        // initialize path metric and traceback memories for likelihood computation
-        double[][] pathMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        int[][] bestMetricArray = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-
-        pathMetricArray[0][0]= 0;//Double.NEGATIVE_INFINITY;
-
-        for (int i=1; i < X_METRIC_LENGTH; i++) {
-            pathMetricArray[i][0] = 0;
-            bestMetricArray[i][0] = UP;
-        }
-
-        for (int j=1; j < Y_METRIC_LENGTH; j++) {
-            pathMetricArray[0][j] = 0;//logGapOpenProbability + (j-1) * logGapContinuationProbability;
-            bestMetricArray[0][j] = LEFT;
-        }
-
-        for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
-            for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
-
-                byte x = readBases[indI-1];
-                byte y = haplotypeBases[indJ-1];
-                byte qual = readQuals[indI-1];
-
-                double bestMetric = 0.0;
-                int bestMetricIdx = 0;
-
-                // compute metric for match/mismatch
-                // workaround for reads whose bases quality = 0,
-                if (qual < 1)
-                    qual = 1;
-
-                if (qual > MAX_CACHED_QUAL)
-                    qual = MAX_CACHED_QUAL;
-
-                double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
-                double[] metrics = new double[3];
-
-                metrics[DIAG] = pathMetricArray[indI-1][indJ-1] + pBaseRead;
-                metrics[UP] = pathMetricArray[indI-1][indJ] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
-                metrics[LEFT] = pathMetricArray[indI][indJ-1] + logGapOpenProbability;//(end?0.0:logGapOpenProbability);
-
-                if (doViterbi) {
-                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
-                    bestMetric = metrics[bestMetricIdx];
-                }
-                else
-                    bestMetric = MathUtils.softMax(metrics);
-
-                pathMetricArray[indI][indJ] = bestMetric;
-                bestMetricArray[indI][indJ] = bestMetricIdx;
-
-            }
-        }
-
-
-        double bestMetric=0.0;
-        int bestMetricIdx=0,bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
-
-        for (int i=0; i < X_METRIC_LENGTH; i ++ ) {
-            int j= Y_METRIC_LENGTH-1;
-
-            if (pathMetricArray[i][j] > bestMetric) {
-                bestMetric = pathMetricArray[i][j];
-                bestI = i;
-                bestJ = j;
-            }
-        }
-        for (int j=0; j < Y_METRIC_LENGTH; j++ ) {
-            int i= X_METRIC_LENGTH-1;
-            if (pathMetricArray[i][j] >= bestMetric) {
-                bestMetric = pathMetricArray[i][j];
-                bestI = i;
-                bestJ = j;
-            }
-        }
-
-        if (DEBUG && doViterbi) {
-
-            String haplotypeString = new String (haplotypeBases);
-            String readString = new String(readBases);
-
-
-            int i = bestI;
-            int j = bestJ;
-
-
-            System.out.println("Simple NW");
-
-            while (i >0 || j >0) {
-                bestMetricIdx = bestMetricArray[i][j];
-                System.out.print(bestMetricIdx);
-                if (bestMetricIdx == UP) {
-                    // insert gap in Y
-                    haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
-                    i--;
-                } else if (bestMetricIdx == LEFT) {
-                    readString = readString.substring(0,i)+"-"+readString.substring(i);
-                    j--;
-                }
-                else {
-                    i--; j--;
-                }
-            }
-
-
-
-
-            System.out.println("\nAlignment: ");
-            System.out.println("R:"+readString);
-            System.out.println("H:"+haplotypeString);
-            System.out.println();
-
-
-        }
-        if (DEBUG)
-            System.out.format("Likelihood: %5.4f\n", bestMetric);
-
-        return bestMetric;
-
-
-    }
-
    static private void getContextHomopolymerLength(final byte[] refBytes, int[] hrunArray) {
        // compute forward hrun length, example:
        // AGGTGACCCCCCTGAGAG
@ -479,14 +171,10 @@ public class PairHMMIndelErrorModel {
        final int Y_METRIC_LENGTH = haplotypeBases.length+1;

        // initialize path metric and traceback memories for likelihood computation
-        double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        int[][] bestActionArrayM = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        int[][] bestActionArrayX = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
-        int[][] bestActionArrayY = new int[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] matchMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] XMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];
+        final double[][] YMetricArray = new double[X_METRIC_LENGTH][Y_METRIC_LENGTH];

-        double c,d;
        matchMetricArray[0][0]= END_GAP_COST;//Double.NEGATIVE_INFINITY;

        for (int i=1; i < X_METRIC_LENGTH; i++) {
@ -494,8 +182,6 @@ public class PairHMMIndelErrorModel {
            matchMetricArray[i][0]  = Double.NEGATIVE_INFINITY;
            YMetricArray[i][0]      = Double.NEGATIVE_INFINITY;
            XMetricArray[i][0]      = END_GAP_COST*(i);//logGapOpenProbability + (i-1)*logGapContinuationProbability;
-
-            bestActionArrayX[i][0] = bestActionArrayY[i][0] = bestActionArrayM[i][0] = UP_GOTO_X;
        }

        for (int j=1; j < Y_METRIC_LENGTH; j++) {
@ -503,188 +189,46 @@ public class PairHMMIndelErrorModel {
            matchMetricArray[0][j]  = Double.NEGATIVE_INFINITY;
            XMetricArray[0][j]      = Double.NEGATIVE_INFINITY;
            YMetricArray[0][j]      = END_GAP_COST*(j);//logGapOpenProbability + (j-1) * logGapContinuationProbability;
-
-            bestActionArrayY[0][j] = bestActionArrayM[0][j] = bestActionArrayX[0][j] = LEFT_GOTO_Y;
        }

        for (int indI=1; indI < X_METRIC_LENGTH; indI++) {
-            int im1 = indI-1;
+            final int im1 = indI-1;
            for (int indJ=1; indJ < Y_METRIC_LENGTH; indJ++) {
-                int jm1 = indJ-1;
-                byte x = readBases[im1];
-                byte y = haplotypeBases[jm1];
-                byte qual = readQuals[im1];
-
-                double bestMetric = 0.0;
-                int bestMetricIdx = 0;
-
-                // compute metric for match/mismatch
-                // workaround for reads whose bases quality = 0,
-                if (qual < 1)
-                    qual = 1;
-
-                if (qual > MAX_CACHED_QUAL)
-                    qual = MAX_CACHED_QUAL;
-
-                double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];
-
-
-                double[] metrics = new double[3];
-
-
-                if (doViterbi) {
-                    // update match array
-                    metrics[MATCH_OFFSET] = matchMetricArray[im1][jm1] + pBaseRead;
-                    metrics[X_OFFSET] = XMetricArray[im1][jm1] + pBaseRead;
-                    metrics[Y_OFFSET] = YMetricArray[im1][jm1] + pBaseRead;
-
-                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
-                    bestMetric = metrics[bestMetricIdx];
-                }
-                else
-                    bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead, XMetricArray[im1][jm1] + pBaseRead,
-                            YMetricArray[im1][jm1] + pBaseRead);
+                final int jm1 = indJ-1;
+                final byte x = readBases[im1];
+                final byte y = haplotypeBases[jm1];
+                final byte qual = readQuals[im1] < 1 ? 1 : (readQuals[im1] > MAX_CACHED_QUAL ? MAX_CACHED_QUAL : readQuals[im1]);
+                final double pBaseRead =  (x == y)? baseMatchArray[(int)qual]:baseMismatchArray[(int)qual];

+                double bestMetric = MathUtils.softMax(matchMetricArray[im1][jm1] + pBaseRead,
+                                                      XMetricArray[im1][jm1] + pBaseRead,
+                                                      YMetricArray[im1][jm1] + pBaseRead);
                matchMetricArray[indI][indJ] = bestMetric;
-                bestActionArrayM[indI][indJ] = ACTIONS_M[bestMetricIdx];

                // update X array
                // State X(i,j): X(1:i) aligned to a gap in Y(1:j).
                // When in last column of X, ie X(1:i) aligned to full Y, we don't want to penalize gaps

-                //c = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
-                //d = (indJ==Y_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
-                if (getGapPenaltiesFromFile) {
-                    c = currentGOP[im1];
-                    d = logGapContinuationProbability;
-
-                } else {
-                    c = currentGOP[jm1];
-                    d = currentGCP[jm1];
-                }
-                if (indJ == Y_METRIC_LENGTH-1)
-                    c = d = END_GAP_COST;
-
-                if (doViterbi) {
-                    metrics[MATCH_OFFSET] = matchMetricArray[im1][indJ] + c;
-                    metrics[X_OFFSET] = XMetricArray[im1][indJ] + d;
-                    metrics[Y_OFFSET] = Double.NEGATIVE_INFINITY; //YMetricArray[indI-1][indJ] + logGapOpenProbability;
-
-                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
-                    bestMetric = metrics[bestMetricIdx];
-                }
-                else
-                    bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c, XMetricArray[im1][indJ] + d);
-
+                final double c1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
+                final double d1 = indJ == Y_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+                bestMetric = MathUtils.softMax(matchMetricArray[im1][indJ] + c1, XMetricArray[im1][indJ] + d1);
                XMetricArray[indI][indJ] = bestMetric;
-                bestActionArrayX[indI][indJ] = ACTIONS_X[bestMetricIdx];

                // update Y array
                //c = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGOP[jm1]);
                //d = (indI==X_METRIC_LENGTH-1? END_GAP_COST: currentGCP[jm1]);
-                if (getGapPenaltiesFromFile) {
-                    c = currentGOP[im1];
-                    d = logGapContinuationProbability;
-                }
-                else {
-                    c = currentGOP[jm1];
-                    d = currentGCP[jm1];                        
-                }
-                if (indI == X_METRIC_LENGTH-1)
-                    c = d = END_GAP_COST;
-
-
-
-                if (doViterbi) {
-                    metrics[MATCH_OFFSET] = matchMetricArray[indI][jm1] + c;
-                    metrics[X_OFFSET] = Double.NEGATIVE_INFINITY; //XMetricArray[indI][indJ-1] + logGapOpenProbability;
-                    metrics[Y_OFFSET] = YMetricArray[indI][jm1] + d;
-
-                    bestMetricIdx = MathUtils.maxElementIndex(metrics);
-                    bestMetric = metrics[bestMetricIdx];
-                }
-                else
-                    bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c, YMetricArray[indI][jm1] + d);
-
+                final double c2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGOP[jm1];
+                final double d2 = indI == X_METRIC_LENGTH-1 ? END_GAP_COST : currentGCP[jm1];
+                bestMetric = MathUtils.softMax(matchMetricArray[indI][jm1] + c2, YMetricArray[indI][jm1] + d2);
                YMetricArray[indI][indJ] = bestMetric;
-                bestActionArrayY[indI][indJ] = ACTIONS_Y[bestMetricIdx];
-
-
-
            }
        }

-        double bestMetric;
-        double metrics[] = new double[3];
-        int bestTable=0, bestI=X_METRIC_LENGTH - 1, bestJ=Y_METRIC_LENGTH - 1;
-        metrics[MATCH_OFFSET] = matchMetricArray[bestI][bestJ];
-        metrics[X_OFFSET] = XMetricArray[bestI][bestJ];
-        metrics[Y_OFFSET] = YMetricArray[bestI][bestJ];
-        if (doViterbi) {
-            bestTable = MathUtils.maxElementIndex(metrics);
-            bestMetric = metrics[bestTable];
-        }
-        else
-            bestMetric = MathUtils.softMax(metrics);
+        final int bestI = X_METRIC_LENGTH - 1, bestJ = Y_METRIC_LENGTH - 1;
+        final double bestMetric = MathUtils.softMax(matchMetricArray[bestI][bestJ],
+                                                    XMetricArray[bestI][bestJ],
+                                                    YMetricArray[bestI][bestJ]);

-        // Do traceback (needed only for debugging!)
-        if (DEBUG && doViterbi) {
-
-            int bestAction;
-            int i = bestI;
-            int j = bestJ;
-
-
-            System.out.println("Affine gap NW");
-
-
-            String haplotypeString = new String (haplotypeBases);
-            String readString = new String(readBases);
-
-
-            while (i >0 || j >0) {
-                if (bestTable == X_OFFSET) {
-                    // insert gap in Y
-                    haplotypeString = haplotypeString.substring(0,j)+"-"+haplotypeString.substring(j);
-                    bestAction = bestActionArrayX[i][j];
-                }
-                else if (bestTable == Y_OFFSET) {
-                    readString = readString.substring(0,i)+"-"+readString.substring(i);
-                    bestAction = bestActionArrayY[i][j];
-
-                }
-                else {
-                    bestAction = bestActionArrayM[i][j];
-                }
-                System.out.print(bestAction);
-
-
-                // bestAction contains action to take at next step
-                // encoding of bestAction: upper 2 bits = direction, lower 2 bits = next table
-
-                // bestTable and nextDirection for next step
-                bestTable = bestAction & 0x3;
-                int nextDirection = bestAction >> 2;
-                if (nextDirection == UP) {
-                    i--;
-                } else if (nextDirection == LEFT) {
-                    j--;
-                } else { //  if (nextDirection == DIAG)
-                    i--; j--;
-                }
-
-            }
-
-
-
-
-            System.out.println("\nAlignment: ");
-            System.out.println("R:"+readString);
-            System.out.println("H:"+haplotypeString);
-            System.out.println();
-
-
-        }
        if (DEBUG)
            System.out.format("Likelihood: %5.4f\n", bestMetric);

@ -707,12 +251,12 @@ public class PairHMMIndelErrorModel {
        }
    }
    public synchronized double[] computeReadHaplotypeLikelihoods(ReadBackedPileup pileup, LinkedHashMap<Allele,Haplotype> haplotypeMap,
-                                                                   ReferenceContext ref, int eventLength,
-                                                                   HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){
+                                                                 ReferenceContext ref, int eventLength,
+                                                                 HashMap<PileupElement, LinkedHashMap<Allele,Double>> indelLikelihoodMap){

        int numHaplotypes = haplotypeMap.size();
-        double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
-        double readLikelihoods[][] = new double[pileup.getReads().size()][numHaplotypes];
+        final double readLikelihoods[][] = new double[pileup.size()][numHaplotypes];
+        final int readCounts[] = new int[pileup.size()];
        int readIdx=0;

        LinkedHashMap<Allele,double[]> gapOpenProbabilityMap = new LinkedHashMap<Allele,double[]>();
@ -723,34 +267,35 @@ public class PairHMMIndelErrorModel {
            System.out.println(new String(ref.getBases()));
        }

-        if (doContextDependentPenalties && !getGapPenaltiesFromFile)   {
-            // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
-
-
-            for (Allele a: haplotypeMap.keySet()) {
-                Haplotype haplotype = haplotypeMap.get(a);
-                byte[] haplotypeBases = haplotype.getBasesAsBytes();
-                double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
-                double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];
-
-                // get homopolymer length profile for current haplotype
-                int[] hrunProfile = new int[haplotypeBases.length];
-                getContextHomopolymerLength(haplotypeBases,hrunProfile);
-                if (DEBUG) {
-                    System.out.println("Haplotype bases:");
-                    System.out.println(new String(haplotypeBases));
-                    for (int i=0; i < hrunProfile.length; i++)
-                        System.out.format("%d",hrunProfile[i]);
-                    System.out.println();
-                }
-                fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
-
-                gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
-                gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
+        // will context dependent probabilities based on homopolymer run. Probabilities are filled based on total complete haplotypes.
+        // todo -- refactor into separate function
+        for (Allele a: haplotypeMap.keySet()) {
+            Haplotype haplotype = haplotypeMap.get(a);
+            byte[] haplotypeBases = haplotype.getBasesAsBytes();
+            double[] contextLogGapOpenProbabilities = new double[haplotypeBases.length];
+            double[] contextLogGapContinuationProbabilities = new double[haplotypeBases.length];

+            // get homopolymer length profile for current haplotype
+            int[] hrunProfile = new int[haplotypeBases.length];
+            getContextHomopolymerLength(haplotypeBases,hrunProfile);
+            if (DEBUG) {
+                System.out.println("Haplotype bases:");
+                System.out.println(new String(haplotypeBases));
+                for (int i=0; i < hrunProfile.length; i++)
+                    System.out.format("%d",hrunProfile[i]);
+                System.out.println();
            }
+            fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
+
+            gapOpenProbabilityMap.put(a,contextLogGapOpenProbabilities);
+            gapContProbabilityMap.put(a,contextLogGapContinuationProbabilities);
+
        }
+
        for (PileupElement p: pileup) {
+            // > 1 when the read is a consensus read representing multiple independent observations
+            final boolean isReduced = ReadUtils.isReducedRead(p.getRead());
+            readCounts[readIdx] = isReduced ? p.getReducedCount() : 1;

            // check if we've already computed likelihoods for this pileup element (i.e. for this read at this location)
            if (indelLikelihoodMap.containsKey(p)) {
@ -762,61 +307,20 @@ public class PairHMMIndelErrorModel {
            }
            else {
                //System.out.format("%d %s\n",p.getRead().getAlignmentStart(), p.getRead().getClass().getName());
-                GATKSAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
+                SAMRecord read = ReadUtils.hardClipAdaptorSequence(p.getRead());
                if (read == null)
                    continue;

-                if(ReadUtils.is454Read(read) && !getGapPenaltiesFromFile) {
+                if ( isReduced ) {
+                    read = ReadUtils.reducedReadWithReducedQuals(read);
+                }
+
+                if(ReadUtils.is454Read(read)) {
                    continue;
                }

                double[] recalQuals = null;

- /*
-                if (getGapPenaltiesFromFile) {
-                    RecalDataManager.parseSAMRecord( read, RAC );
-
-
-                    recalQuals = new double[read.getReadLength()];
-
-                    //compute all covariate values for this read
-                    final Comparable[][] covariateValues_offset_x_covar =
-                            RecalDataManager.computeCovariates((GATKSAMRecord) read, requestedCovariates);
-                    // For each base in the read
-                    for( int offset = 0; offset < read.getReadLength(); offset++ ) {
-
-                        final Object[] fullCovariateKey = covariateValues_offset_x_covar[offset];
-
-                        Byte qualityScore = (Byte) qualityScoreByFullCovariateKey.get(fullCovariateKey);
-                        if(qualityScore == null)
-                        {
-                            qualityScore = performSequentialQualityCalculation( fullCovariateKey );
-                            qualityScoreByFullCovariateKey.put(qualityScore, fullCovariateKey);
-                        }
-
-                        recalQuals[offset] = -((double)qualityScore)/10.0;
-                    }
-
-                    // for each read/haplotype combination, compute likelihoods, ie -10*log10(Pr(R | Hi))
-                    // = sum_j(-10*log10(Pr(R_j | Hi) since reads are assumed to be independent
-                    if (DEBUG)  {
-                        System.out.format("\n\nStarting read:%s S:%d US:%d E:%d UE:%d C:%s\n",read.getReadName(),
-                                read.getAlignmentStart(),
-                                read.getUnclippedStart(), read.getAlignmentEnd(), read.getUnclippedEnd(),
-                                read.getCigarString());
-
-                        byte[] bases = read.getReadBases();
-                        for (int k = 0; k < recalQuals.length; k++) {
-                            System.out.format("%c",bases[k]);
-                        }
-                        System.out.println();
-
-                        for (int k = 0; k < recalQuals.length; k++) {
-                            System.out.format("%.0f ",recalQuals[k]);
-                        }
-                        System.out.println();
-                    }
-                }        */
                // get bases of candidate haplotypes that overlap with reads
                final int trailingBases = 3;

@ -937,11 +441,6 @@ public class PairHMMIndelErrorModel {
                            unclippedReadBases.length-numEndClippedBases);

                    double[] recalCDP = null;
-                    if (getGapPenaltiesFromFile) {
-                        recalCDP = Arrays.copyOfRange(recalQuals,numStartClippedBases,
-                                unclippedReadBases.length-numEndClippedBases);
-
-                    }

                    if (DEBUG) {
                        System.out.println("Read bases:");
@ -971,27 +470,9 @@ public class PairHMMIndelErrorModel {
                            System.out.println(new String(haplotypeBases));
                        }

-                        Double readLikelihood = 0.0;
-                        if (useAffineGapModel) {
-
-                            double[] currentContextGOP = null;
-                            double[] currentContextGCP = null;
-
-                            if (doContextDependentPenalties) {
-
-                               if (getGapPenaltiesFromFile) {
-                                   readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, recalCDP, null);
-
-                               }  else {
-                                   currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
-                                   currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
-                                   readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);
-                               }
-                            }
-
-                        }
-                        else
-                            readLikelihood = computeReadLikelihoodGivenHaplotype(haplotypeBases, readBases, readQuals);
+                        final double[] currentContextGOP = Arrays.copyOfRange(gapOpenProbabilityMap.get(a), (int)indStart, (int)indStop);
+                        final double[] currentContextGCP = Arrays.copyOfRange(gapContProbabilityMap.get(a), (int)indStart, (int)indStop);
+                        final double readLikelihood = computeReadLikelihoodGivenHaplotypeAffineGaps(haplotypeBases, readBases, readQuals, currentContextGOP, currentContextGCP);

                        readEl.put(a,readLikelihood);
                        readLikelihoods[readIdx][j++] = readLikelihood;
@ -1004,7 +485,7 @@ public class PairHMMIndelErrorModel {

        if (DEBUG) {
            System.out.println("\nLikelihood summary");
-            for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
+            for (readIdx=0; readIdx < pileup.size(); readIdx++) {
                System.out.format("Read Index: %d ",readIdx);
                for (int i=0; i < readLikelihoods[readIdx].length; i++)
                    System.out.format("L%d: %f ",i,readLikelihoods[readIdx][i]);
@ -1012,123 +493,41 @@ public class PairHMMIndelErrorModel {
            }

        }
+
+        return getHaplotypeLikelihoods(numHaplotypes, readCounts, readLikelihoods);
+    }
+
+    private final static double[] getHaplotypeLikelihoods(final int numHaplotypes, final int readCounts[], final double readLikelihoods[][]) {
+        final double[][] haplotypeLikehoodMatrix = new double[numHaplotypes][numHaplotypes];
+
+        // todo: MAD 09/26/11 -- I'm almost certain this calculation can be simplied to just a single loop without the intermediate NxN matrix
        for (int i=0; i < numHaplotypes; i++) {
            for (int j=i; j < numHaplotypes; j++){
                // combine likelihoods of haplotypeLikelihoods[i], haplotypeLikelihoods[j]
                // L(Hi, Hj) = sum_reads ( Pr(R|Hi)/2 + Pr(R|Hj)/2)
                //readLikelihoods[k][j] has log10(Pr(R_k) | H[j] )
-                 for (readIdx=0; readIdx < pileup.getReads().size(); readIdx++) {
-
+                for (int readIdx = 0; readIdx < readLikelihoods.length; readIdx++) {
                    // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
                    // First term is approximated by Jacobian log with table lookup.
                    if (Double.isInfinite(readLikelihoods[readIdx][i]) && Double.isInfinite(readLikelihoods[readIdx][j]))
                        continue;
-                    haplotypeLikehoodMatrix[i][j] += ( MathUtils.softMax(readLikelihoods[readIdx][i],
-                            readLikelihoods[readIdx][j]) + LOG_ONE_HALF);
-
+                    final double li = readLikelihoods[readIdx][i];
+                    final double lj = readLikelihoods[readIdx][j];
+                    final int readCount = readCounts[readIdx];
+                    haplotypeLikehoodMatrix[i][j] += readCount * (MathUtils.softMax(li, lj) + LOG_ONE_HALF);
                }
-
-
            }
        }

-        return getHaplotypeLikelihoods(haplotypeLikehoodMatrix);
-
-    }
-
-    public static double[] getHaplotypeLikelihoods(double[][] haplotypeLikehoodMatrix) {
-        int hSize = haplotypeLikehoodMatrix.length;
-        double[] genotypeLikelihoods = new double[hSize*(hSize+1)/2];
-
+        final double[] genotypeLikelihoods = new double[numHaplotypes*(numHaplotypes+1)/2];
        int k=0;
-        double maxElement = Double.NEGATIVE_INFINITY;
-        for (int j=0; j < hSize; j++) {
+        for (int j=0; j < numHaplotypes; j++) {
            for (int i=0; i <= j; i++){
                genotypeLikelihoods[k++] = haplotypeLikehoodMatrix[i][j];
-                if (haplotypeLikehoodMatrix[i][j] > maxElement)
-                    maxElement = haplotypeLikehoodMatrix[i][j];
            }
        }

-        // renormalize
-        for (int i=0; i < genotypeLikelihoods.length; i++)
-            genotypeLikelihoods[i] -= maxElement;
-
-        return genotypeLikelihoods;
+        // renormalize   so that max element is zero.
+        return MathUtils.normalizeFromLog10(genotypeLikelihoods, false, true);
    }
-
-    /**
-     * Implements a serial recalibration of the reads using the combinational table.
-     * First, we perform a positional recalibration, and then a subsequent dinuc correction.
-     *
-     * Given the full recalibration table, we perform the following preprocessing steps:
-     *
-     *   - calculate the global quality score shift across all data [DeltaQ]
-     *   - calculate for each of cycle and dinuc the shift of the quality scores relative to the global shift
-     *      -- i.e., DeltaQ(dinuc) = Sum(pos) Sum(Qual) Qempirical(pos, qual, dinuc) - Qreported(pos, qual, dinuc) / Npos * Nqual
-     *   - The final shift equation is:
-     *
-     *      Qrecal = Qreported + DeltaQ + DeltaQ(pos) + DeltaQ(dinuc) + DeltaQ( ... any other covariate ... )
-     * @param key The list of Comparables that were calculated from the covariates
-     * @return A recalibrated quality score as a byte
-     */
- /*
-    private byte performSequentialQualityCalculation( final Object... key ) {
-
-        final byte qualFromRead = (byte)Integer.parseInt(key[1].toString());
-        final Object[] readGroupCollapsedKey = new Object[1];
-        final Object[] qualityScoreCollapsedKey = new Object[2];
-        final Object[] covariateCollapsedKey = new Object[3];
-
-        // The global quality shift (over the read group only)
-        readGroupCollapsedKey[0] = key[0];
-        final RecalDatum globalRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(0).get( readGroupCollapsedKey ));
-        double globalDeltaQ = 0.0;
-        if( globalRecalDatum != null ) {
-            final double globalDeltaQEmpirical = globalRecalDatum.getEmpiricalQuality();
-            final double aggregrateQReported = globalRecalDatum.getEstimatedQReported();
-            globalDeltaQ = globalDeltaQEmpirical - aggregrateQReported;
-        }
-
-        // The shift in quality between reported and empirical
-        qualityScoreCollapsedKey[0] = key[0];
-        qualityScoreCollapsedKey[1] = key[1];
-        final RecalDatum qReportedRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(1).get( qualityScoreCollapsedKey ));
-        double deltaQReported = 0.0;
-        if( qReportedRecalDatum != null ) {
-            final double deltaQReportedEmpirical = qReportedRecalDatum.getEmpiricalQuality();
-            deltaQReported = deltaQReportedEmpirical - qualFromRead - globalDeltaQ;
-        }
-
-        // The shift in quality due to each covariate by itself in turn
-        double deltaQCovariates = 0.0;
-        double deltaQCovariateEmpirical;
-        covariateCollapsedKey[0] = key[0];
-        covariateCollapsedKey[1] = key[1];
-        for( int iii = 2; iii < key.length; iii++ ) {
-            covariateCollapsedKey[2] =  key[iii]; // The given covariate
-            final RecalDatum covariateRecalDatum = ((RecalDatum)dataManager.getCollapsedTable(iii).get( covariateCollapsedKey ));
-            if( covariateRecalDatum != null ) {
-                deltaQCovariateEmpirical = covariateRecalDatum.getEmpiricalQuality();
-                deltaQCovariates += ( deltaQCovariateEmpirical - qualFromRead - (globalDeltaQ + deltaQReported) );
-            }
-        }
-
-        final double newQuality = qualFromRead + globalDeltaQ + deltaQReported + deltaQCovariates;
-        return QualityUtils.boundQual( (int)Math.round(newQuality), (byte)MAX_QUALITY_SCORE );
-
-        // Verbose printouts used to validate with old recalibrator
-        //if(key.contains(null)) {
-        //    System.out.println( key  + String.format(" => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte));
-        //}
-        //else {
-        //    System.out.println( String.format("%s %s %s %s => %d + %.2f + %.2f + %.2f + %.2f = %d",
-        //                 key.get(0).toString(), key.get(3).toString(), key.get(2).toString(), key.get(1).toString(), qualFromRead, globalDeltaQ, deltaQReported, deltaQPos, deltaQDinuc, newQualityByte) );
-        //}
-
-        //return newQualityByte;
-
-    }
-*/
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@ -68,26 +68,59 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.*;
 import java.util.*;

+
 /**
+ * Tool for calling indels in Tumor-Normal paired sample mode; this tool supports single-sample mode as well,
+ * but this latter functionality is now superceded by UnifiedGenotyper.
+ *
+ * <p>
 * This is a simple, counts-and-cutoffs based tool for calling indels from aligned (preferrably MSA cleaned) sequencing
- * data. Two output formats supported are: BED format (minimal output, required), and extended output that includes read
- * and mismtach statistics around the calls (tuned on with --verbose). The calls can be performed from a single/pooled sample,
- * or from a matched pair of samples (with --somatic option). In the latter case, two input bam files must be specified,
- * the order is important: indels are called from the second sample ("Tumor") and additionally annotated as germline
- * if even a weak evidence for the same indel, not necessarily a confident call, exists in the first sample ("Normal"), or as somatic
- * if first bam has coverage at the site but no indication for an indel. In the --somatic mode, BED output contains
- * only somatic calls, while --verbose output contains all calls annotated with GERMLINE/SOMATIC keywords.
+ * data. Supported output formats are: BED format, extended verbose output (tab separated), and VCF. The latter two outputs
+ * include additional statistics such as mismtaches and base qualitites around the calls, read strandness (how many
+ * forward/reverse reads support ref and indel alleles) etc. It is highly recommended to use these additional
+ * statistics to perform post-filtering of the calls as the tool is tuned for sensitivity (in other words it will
+ * attempt to "call" anything remotely reasonable based only on read counts and will generate all the additional
+ * metrics for the post-processing tools to make the final decision). The calls are performed by default
+ * from a matched tumor-normal pair of samples. In this case, two (sets of) input bam files must be specified using tagged -I
+ * command line arguments: normal and tumor bam(s) must be passed with -I:normal and -I:tumor arguments,
+ * respectively. Indels are called from the tumor sample and annotated as germline
+ * if even a weak evidence for the same indel, not necessarily a confident call, exists in the normal sample, or as somatic
+ * if normal sample has coverage at the site but no indication for an indel. Note that strictly speaking the calling
+ * is not even attempted in normal sample: if there is an indel in normal that is not detected/does not pass a threshold
+ * in tumor sample, it will not be reported.
 *
- * <b>If any of the general usage of this tool or any of the command-line arguments for this tool are not clear to you,
- * please email asivache at broadinstitute dot org and he will gladly explain everything in more detail.</b>
+ * To make indel calls and associated metrics for a single sample, this tool can be run with --unpaired flag (input
+ * bam tagging is not required in this case, and tags are completely ignored if still used: all input bams will be merged
+ * on the fly and assumed to represent a single sample - this tool does not check for sample id in the read groups).
 *
+ * <h2>Input</h2>
+ * <p>
+ * Tumor and normal bam files (or single sample bam file(s) in --unpaired mode).
+ * </p>
+ *
+ * <h2>Output</h2>
+ * <p>
+ * Indel calls with associated metrics.
+ * </p>
+ *
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T SomaticIndelDetector \
+ *   -o indels.vcf \
+ *   -verbose indels.txt
+ *   -I:normal normal.bam \
+ *   -I:tumor tumor.bam
+ * </pre>
 *
 */
+
@ReadFilters({Platform454Filter.class, MappingQualityZeroFilter.class, PlatformUnitFilter.class})
 public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {
 //    @Output
 //    PrintStream out;
-    @Output(doc="File to which variants should be written",required=true)
+    @Output(doc="File to write variants (indels) in VCF format",required=true)
    protected VCFWriter vcf_writer = null;

    @Argument(fullName="outputFile", shortName="O", doc="output file name (BED format). DEPRECATED> Use --bed", required=true)
@ -102,68 +135,80 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {

    @Hidden
    @Argument(fullName = "genotype_intervals", shortName = "genotype",
-            doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or it's the ref", required = false)
+        doc = "Calls will be made at each position within the specified interval(s), whether there is an indel or not", required = false)
    public String genotypeIntervalsFile = null;

    @Hidden
    @Argument(fullName="genotypeIntervalsAreNotSorted", shortName="giNotSorted", required=false,
-            doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
-                "if the list turns out to be unsorted, it will throw an exception.  "+
-                "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
-                "to sort and keep it in memory (increases memory usage!).")
+        doc="This tool assumes that the genotyping interval list (--genotype_intervals) is sorted; "+
+            "if the list turns out to be unsorted, it will throw an exception.  "+
+            "Use this argument when your interval list is not sorted to instruct the IndelGenotyper "+
+            "to sort and keep it in memory (increases memory usage!).")
    protected boolean GENOTYPE_NOT_SORTED = false;

    @Hidden
-	@Argument(fullName="unpaired", shortName="unpaired",
-			doc="Perform unpaired calls (no somatic status detection)", required=false)
+    @Argument(fullName="unpaired", shortName="unpaired",
+                    doc="Perform unpaired calls (no somatic status detection)", required=false)
    boolean call_unpaired = false;
-	boolean call_somatic ;
+    boolean call_somatic ;

-	@Argument(fullName="verboseOutput", shortName="verbose",
-			doc="Verbose output file in text format", required=false)
-	java.io.File verboseOutput = null;
+    @Argument(fullName="verboseOutput", shortName="verbose",
+                    doc="Verbose output file in text format", required=false)
+    java.io.File verboseOutput = null;

    @Argument(fullName="bedOutput", shortName="bed",
-            doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
+        doc="Lightweight bed output file (only positions and events, no stats/annotations)", required=false)
    java.io.File bedOutput = null;

-	@Argument(fullName="minCoverage", shortName="minCoverage",
-			doc="indel calls will be made only at sites with coverage of minCoverage or more reads; with --somatic this value is applied to tumor sample", required=false)
-	int minCoverage = 6;
+    @Argument(fullName="minCoverage", shortName="minCoverage",
+                    doc="indel calls will be made only at sites with tumor coverage of minCoverage or more reads; "+
+            "with --unpaired (single sample) option, this value is used for minimum sample coverage", required=false)
+    int minCoverage = 6;

-	@Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
-			doc="used only with --somatic;  normal sample must have at least minNormalCoverage or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
-	int minNormalCoverage = 4;
+    @Argument(fullName="minNormalCoverage", shortName="minNormalCoverage",
+                    doc="used only in default (somatic) mode;  normal sample must have at least minNormalCoverage "+
+            "or more reads at the site to call germline/somatic indel, otherwise the indel (in tumor) is ignored", required=false)
+    int minNormalCoverage = 4;

-	@Argument(fullName="minFraction", shortName="minFraction",
-			doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
-			" (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
-	double minFraction = 0.3;
+    @Argument(fullName="minFraction", shortName="minFraction",
+                    doc="Minimum fraction of reads with CONSENSUS indel at a site, out of all reads covering the site, required for making a call"+
+                    " (fraction of non-consensus indels at the site is not considered here, see minConsensusFraction)", required=false)
+    double minFraction = 0.3;

-	@Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
-			doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt all indel observations at the site exceeds this threshold", required=false)
-	double minConsensusFraction = 0.7;
+    @Argument(fullName="minConsensusFraction", shortName="minConsensusFraction",
+                    doc="Indel call is made only if fraction of CONSENSUS indel observations at a site wrt "+
+            "all indel observations at the site exceeds this threshold", required=false)
+    double minConsensusFraction = 0.7;

-	@Argument(fullName="minIndelCount", shortName="minCnt",
-			doc="Minimum count of reads supporting consensus indel required for making the call. "+
-			" This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
-			"(minIndelCount not met) will not pass.", required=false)
-	int minIndelCount = 0;
+    @Argument(fullName="minIndelCount", shortName="minCnt",
+                    doc="Minimum count of reads supporting consensus indel required for making the call. "+
+                    " This filter supercedes minFraction, i.e. indels with acceptable minFraction at low coverage "+
+                    "(minIndelCount not met) will not pass.", required=false)
+    int minIndelCount = 0;

-	@Argument(fullName="refseq", shortName="refseq",
-			doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
-	String RefseqFileName = null;
+    @Argument(fullName="refseq", shortName="refseq",
+                    doc="Name of RefSeq transcript annotation file. If specified, indels will be annotated with "+
+            "GENOMIC/UTR/INTRON/CODING and with the gene name", required=false)
+    String RefseqFileName = null;

-    @Argument(fullName="blacklistedLanes", shortName="BL",
-            doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
-                    "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
-    PlatformUnitFilterHelper dummy;
-     @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",required=false) Boolean DEBUG = false;
+//@Argument(fullName="blacklistedLanes", shortName="BL",
+//        doc="Name of lanes (platform units) that should be ignored. Reads coming from these lanes will never be seen "+
+//                "by this application, so they will not contribute indels to consider and will not be counted.", required=false)
+//PlatformUnitFilterHelper dummy;
+
+    @Hidden
+    @Argument(fullName="indel_debug", shortName="idebug", doc="Detailed printout for debugging, do not turn this on",
+            required=false) Boolean DEBUG = false;
    @Argument(fullName="window_size", shortName="ws", doc="Size (bp) of the sliding window used for accumulating the coverage. "+
-            "May need to be increased to accomodate longer reads or longer deletions.",required=false) int WINDOW_SIZE = 200;
+            "May need to be increased to accomodate longer reads or longer deletions. A read can be fit into the "+
+            "window if its length on the reference (i.e. read length + length of deletion gap(s) if any) is smaller "+
+            "than the window size. Reads that do not fit will be ignored, so long deletions can not be called "+
+            "if window is too small",required=false) int WINDOW_SIZE = 200;
    @Argument(fullName="maxNumberOfReads",shortName="mnr",doc="Maximum number of reads to cache in the window; if number of reads exceeds this number,"+
                " the window will be skipped and no calls will be made from it",required=false) int MAX_READ_NUMBER = 10000;

+
+
 	private WindowContext tumor_context;
 	private WindowContext normal_context; 
 	private int currentContigIndex = -1;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhasingRead.java
@ -37,7 +37,7 @@ public class PhasingRead extends BaseArray {
    public PhasingRead(int length, int mappingQual) {
        super(length);

-        this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb(mappingQual));
+        this.mappingProb = new PreciseNonNegativeDouble(QualityUtils.qualToProb((byte)mappingQual));

        this.baseProbs = new PreciseNonNegativeDouble[length];
        Arrays.fill(this.baseProbs, null);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/RefSeqDataParser.java
@ -44,12 +44,12 @@ public class RefSeqDataParser {
        String nameKeyToUseMultiplePrefix = nameKeyToUse + "_";

        Map<String, String> entriesToNames = new HashMap<String, String>();
-        Integer numRecords = vc.getAttributeAsIntegerNoException(NUM_RECORDS_KEY);
-        if (numRecords != null) {
+        int numRecords = vc.getAttributeAsInt(NUM_RECORDS_KEY, -1);
+        if (numRecords != -1) {
            boolean done = false;

            if (numRecords == 1) { // Check if perhaps the single record doesn't end with "_1":
-                String name = vc.getAttributeAsStringNoException(nameKeyToUse);
+                String name = vc.getAttributeAsString(nameKeyToUse, null);
                if (name != null) {
                    entriesToNames.put(nameKeyToUse, name);
                    done = true;
@ -59,14 +59,14 @@ public class RefSeqDataParser {
            if (!done) {
                for (int i = 1; i <= numRecords; i++) {
                    String key = nameKeyToUseMultiplePrefix + i;
-                    String name = vc.getAttributeAsStringNoException(key);
+                    String name = vc.getAttributeAsString(key, null);
                    if (name != null)
                        entriesToNames.put(key, name);
                }
            }
        }
        else { // no entry with the # of records:
-            String name = vc.getAttributeAsStringNoException(nameKeyToUse);
+            String name = vc.getAttributeAsString(nameKeyToUse, null);
            if (name != null) {
                entriesToNames.put(nameKeyToUse, name);
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/DocumentationTest.java
@ -42,6 +42,7 @@ import java.util.*;
 *
 * <p>Body test</p>
 */
+@Hidden
 public class DocumentationTest extends RodWalker<Integer, Integer> {
    // the docs for the arguments are in the collection
    @ArgumentCollection protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/recalibration/CountCovariatesWalker.java
@ -76,6 +76,42 @@ import java.util.Map;
 * <h2>Output</h2>
 * <p>
 * A recalibration table file in CSV format that is used by the TableRecalibration walker.
+ * It is a comma-separated text file relating the desired covariates to the number of such bases and their rate of mismatch in the genome, and its implied empirical quality score.  
+ *
+ * The first 20 lines of such a file is shown below.  
+ * * The file begins with a series of comment lines describing:
+ * ** The number of counted loci
+ * ** The number of counted bases
+ * ** The number of skipped loci and the fraction skipped, due to presence in dbSNP or bad reference bases
+ * 
+ * * After the comments appears a header line indicating which covariates were used as well as the ordering of elements in the subsequent records.  
+ *
+ * * After the header, data records occur one per line until the end of the file. The first several items on a line are the values of the individual covariates and will change
+ * depending on which covariates were specified at runtime. The last three items are the data- that is, number of observations for this combination of covariates, number of 
+ * reference mismatches, and the raw empirical quality score calculated by phred-scaling the mismatch rate.
+ * 
+ * <pre>
+ * # Counted Sites    19451059
+ * # Counted Bases    56582018
+ * # Skipped Sites    82666
+ * # Fraction Skipped 1 / 235 bp
+ * ReadGroup,QualityScore,Cycle,Dinuc,nObservations,nMismatches,Qempirical
+ * SRR006446,11,65,CA,9,1,10
+ * SRR006446,11,48,TA,10,0,40
+ * SRR006446,11,67,AA,27,0,40
+ * SRR006446,11,61,GA,11,1,10
+ * SRR006446,12,34,CA,47,1,17
+ * SRR006446,12,30,GA,52,1,17
+ * SRR006446,12,36,AA,352,1,25
+ * SRR006446,12,17,TA,182,11,12
+ * SRR006446,11,48,TG,2,0,40
+ * SRR006446,11,67,AG,1,0,40
+ * SRR006446,12,34,CG,9,0,40
+ * SRR006446,12,30,GG,43,0,40
+ * ERR001876,4,31,AG,1,0,40
+ * ERR001876,4,31,AT,2,2,1
+ * ERR001876,4,31,CA,1,0,40
+ * </pre>
 * </p>
 *
 * <h2>Examples</h2>
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/validation/ValidationAmplicons.java
@ -61,7 +61,7 @@ import java.util.List;
 * CACGTTCGGcttgtgcagagcctcaaggtcatccagaggtgatAGTTTAGGGCCCTCTCAAGTCTTTCCNGTGCGCATGG[GT/AC*]CAGCCCTGGGCACCTGTNNNNNNNNNNNNNTGCTCATGGCCTTCTAGATTCCCAGGAAATGTCAGAGCTTTTCAAAGCCC
 *</pre>
 * are amplicon sequences resulting from running the tool. The flags (preceding the sequence itself) can be:
- *
+ *<pre>
 * Valid                     // amplicon is valid
 * SITE_IS_FILTERED=1        // validation site is not marked 'PASS' or '.' in its filter field ("you are trying to validate a filtered variant")
 * VARIANT_TOO_NEAR_PROBE=1  // there is a variant too near to the variant to be validated, potentially shifting the mass-spec peak
@ -72,10 +72,10 @@ import java.util.List;
 * END_TOO_CLOSE,            // variant is too close to the end of the amplicon region to give sequenom a good chance to find a suitable primer
 * NO_VARIANTS_FOUND,        // no variants found within the amplicon region
 * INDEL_OVERLAPS_VALIDATION_SITE, // an insertion or deletion interferes directly with the site to be validated (i.e. insertion directly preceding or postceding, or a deletion that spans the site itself)
- * </p>
+ * </pre></p>
 *
 * <h2>Examples</h2>
- * <pre></pre>
+ * <pre>
 *    java
 *      -jar GenomeAnalysisTK.jar
 *      -T ValidationAmplicons
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java
@ -55,7 +55,23 @@ import java.util.*;
 *
 * <h2>Output</h2>
 * <p>
- * Evaluation tables.
+ * Evaluation tables detailing the results of the eval modules which were applied.
+ * For example:
+ * <pre>
+ * output.eval.gatkreport:
+ * ##:GATKReport.v0.1 CountVariants : Counts different classes of variants in the sample
+ * CountVariants  CompRod   CpG      EvalRod  JexlExpression  Novelty  nProcessedLoci  nCalledLoci  nRefLoci  nVariantLoci  variantRate ...
+ * CountVariants  dbsnp     CpG      eval     none            all      65900028        135770       0         135770        0.00206024  ...
+ * CountVariants  dbsnp     CpG      eval     none            known    65900028        47068        0         47068         0.00071423  ...
+ * CountVariants  dbsnp     CpG      eval     none            novel    65900028        88702        0         88702         0.00134601  ...
+ * CountVariants  dbsnp     all      eval     none            all      65900028        330818       0         330818        0.00502000  ...
+ * CountVariants  dbsnp     all      eval     none            known    65900028        120685       0         120685        0.00183133  ...
+ * CountVariants  dbsnp     all      eval     none            novel    65900028        210133       0         210133        0.00318866  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            all      65900028        195048       0         195048        0.00295976  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            known    65900028        73617        0         73617         0.00111710  ...
+ * CountVariants  dbsnp     non_CpG  eval     none            novel    65900028        121431       0         121431        0.00184265  ...
+ * ...
+ * </pre>
 * </p>
 *
 * <h2>Examples</h2>
@ -149,12 +165,12 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
    @Argument(shortName="mvq", fullName="mendelianViolationQualThreshold", doc="Minimum genotype QUAL score for each trio member required to accept a site as a violation", required=false)
    protected double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 50;

-    @Argument(fullName="tranchesFile", shortName="tf", doc="The input tranches file describing where to cut the data", required=false)
-    private String TRANCHE_FILENAME = null;
-
    @Argument(fullName="ancestralAlignments", shortName="aa", doc="Fasta file with ancestral alleles", required=false)
    private File ancestralAlignmentsFile = null;

+    @Argument(fullName="requireStrictAlleleMatch", shortName="strict", doc="If provided only comp and eval tracks with exactly matching reference and alternate alleles will be counted as overlapping", required=false)
+    private boolean requireStrictAlleleMatch = false;
+
    // Variables
    private Set<SortableJexlVCMatchExp> jexlExpressions = new TreeSet<SortableJexlVCMatchExp>();

@ -226,16 +242,6 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        }
        sampleNamesForStratification.add(ALL_SAMPLE_NAME);

-        // Add select expressions for anything in the tranches file
-        if ( TRANCHE_FILENAME != null ) {
-            // we are going to build a few select names automatically from the tranches file
-            for ( Tranche t : Tranche.readTranches(new File(TRANCHE_FILENAME)) ) {
-                logger.info("Adding select for all variant above the pCut of : " + t);
-                SELECT_EXPS.add(String.format(VariantRecalibrator.VQS_LOD_KEY + " >= %.2f", t.minVQSLod));
-                SELECT_NAMES.add(String.format("TS-%.2f", t.ts));
-            }
-        }
-
        // Initialize select expressions
        for (VariantContextUtils.JexlVCMatchExp jexl : VariantContextUtils.initializeMatchExps(SELECT_NAMES, SELECT_EXPS)) {
            SortableJexlVCMatchExp sjexl = new SortableJexlVCMatchExp(jexl.name, jexl.exp);
@ -245,18 +251,13 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        // Initialize the set of stratifications and evaluations to use
        stratificationObjects = variantEvalUtils.initializeStratificationObjects(this, NO_STANDARD_STRATIFICATIONS, STRATIFICATIONS_TO_USE);
        Set<Class<? extends VariantEvaluator>> evaluationObjects = variantEvalUtils.initializeEvaluationObjects(NO_STANDARD_MODULES, MODULES_TO_USE);
-        boolean usingJEXL = false;
        for ( VariantStratifier vs : getStratificationObjects() ) {
            if ( vs.getClass().getSimpleName().equals("Filter") )
                byFilterIsEnabled = true;
            else if ( vs.getClass().getSimpleName().equals("Sample") )
                perSampleIsEnabled = true;
-            usingJEXL = usingJEXL || vs.getClass().equals(JexlExpression.class);
        }

-        if ( TRANCHE_FILENAME != null && ! usingJEXL )
-            throw new UserException.BadArgumentValue("tf", "Requires the JexlExpression ST to enabled");
-
        // Initialize the evaluation contexts
        evaluationContexts = variantEvalUtils.initializeEvaluationContexts(stratificationObjects, evaluationObjects, null, null);

@ -378,16 +379,16 @@ public class VariantEvalWalker extends RodWalker<Integer, Integer> implements Tr
        if ( matchingComps.size() == 0 )
            return null;

-        // find the comp which matches the alternate allele from eval
+        // find the comp which matches both the reference allele and alternate allele from eval
        Allele altEval = eval.getAlternateAlleles().size() == 0 ? null : eval.getAlternateAllele(0);
        for ( VariantContext comp : matchingComps ) {
            Allele altComp = comp.getAlternateAlleles().size() == 0 ? null : comp.getAlternateAllele(0);
-            if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp)) )
+            if ( (altEval == null && altComp == null) || (altEval != null && altEval.equals(altComp) && eval.getReference().equals(comp.getReference())) )
                return comp;
        }

-        // if none match, just return the first one
-        return matchingComps.get(0);
+        // if none match, just return the first one unless we require a strict match
+        return (requireStrictAlleleMatch ? null : matchingComps.get(0));
    }

    public Integer treeReduce(Integer lhs, Integer rhs) { return null; }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CompOverlap.java
@ -22,9 +22,6 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
    @DataPoint(description = "number of eval SNP sites")
    long nEvalVariants = 0;

-    @DataPoint(description = "number of comp SNP sites")
-    long nCompVariants = 0;
-
    @DataPoint(description = "number of eval sites outside of comp sites")
    long novelSites = 0;

@ -75,10 +72,9 @@ public class CompOverlap extends VariantEvaluator implements StandardEval {
    }

    public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        boolean evalIsGood = eval != null && eval.isVariant();
-        boolean compIsGood = comp != null && comp.isNotFiltered() && (eval == null || comp.getType() == eval.getType());
+        boolean evalIsGood = eval != null && eval.isPolymorphic();
+        boolean compIsGood = comp != null && comp.isNotFiltered();

-        if (compIsGood) nCompVariants++;           // count the number of comp events
        if (evalIsGood) nEvalVariants++;           // count the number of eval events

        if (compIsGood && evalIsGood) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
@ -100,21 +100,22 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
        // So in order to maintain consistency with the previous implementation (and the intention of the original author), I've
        // added in a proxy check for monomorphic status here.
        // Protect against case when vc only as no-calls too - can happen if we strafity by sample and sample as a single no-call.
-       if ( !vc1.isVariant() || (vc1.hasGenotypes() &&  vc1.getHomRefCount() + vc1.getNoCallCount() == vc1.getNSamples()) ) {
+       if ( vc1.isMonomorphic() ) {
            nRefLoci++;
        } else {
             switch (vc1.getType()) {
                case NO_VARIATION:
+                    // shouldn't get here
                    break;
                case SNP:
                    nVariantLoci++;
                    nSNPs++;
-                    if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
+                    if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
                    break;
                case MNP:
                    nVariantLoci++;
                    nMNPs++;
-                    if (vc1.getAttributeAsBoolean("ISSINGLETON")) nSingletons++;
+                    if (vc1.getAttributeAsBoolean("ISSINGLETON", false)) nSingletons++;
                    break;
                case INDEL:
                    nVariantLoci++;
@ -136,7 +137,7 @@ public class CountVariants extends VariantEvaluator implements StandardEval {

        String refStr = vc1.getReference().getBaseString().toUpperCase();

-        String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE").toUpperCase() : null;
+        String aaStr = vc1.hasAttribute("ANCESTRALALLELE") ? vc1.getAttributeAsString("ANCESTRALALLELE", null).toUpperCase() : null;
 //        if (aaStr.equals(".")) {
 //            aaStr = refStr;
 //        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/GenotypePhasingEvaluator.java
@ -219,7 +219,8 @@ public class GenotypePhasingEvaluator extends VariantEvaluator {
    }

    public static Double getPQ(Genotype gt) {
-        return gt.getAttributeAsDoubleNoException(ReadBackedPhasingWalker.PQ_KEY);
+        Double d = gt.getAttributeAsDouble(ReadBackedPhasingWalker.PQ_KEY, -1);
+        return d == -1 ? null : d;
    }

    public static boolean topMatchesTop(AllelePair b1, AllelePair b2) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelLengthHistogram.java
@ -90,18 +90,19 @@ public class IndelLengthHistogram extends VariantEvaluator {
    public int getComparisonOrder() { return 1; } // need only the evals

    public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if ( ! vc1.isBiallelic() && vc1.isIndel() ) {
-            //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
-            return vc1.toString(); // biallelic sites are output
-        }

-        if ( vc1.isIndel() ) {
+        if ( vc1.isIndel() && vc1.isPolymorphic() ) {
+
+            if ( ! vc1.isBiallelic() ) {
+                //veWalker.getLogger().warn("[IndelLengthHistogram] Non-biallelic indel at "+ref.getLocus()+" ignored.");
+                return vc1.toString(); // biallelic sites are output
+            }
+
+            // only count simple insertions/deletions, not complex indels
            if ( vc1.isSimpleInsertion() ) {
                indelHistogram.update(vc1.getAlternateAllele(0).length());
            } else if ( vc1.isSimpleDeletion() ) {
                indelHistogram.update(-vc1.getReference().length());
-            } else {
-                throw new ReviewedStingException("Indel type that is not insertion or deletion.");
            }
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/IndelStatistics.java
@ -270,7 +270,7 @@ public class IndelStatistics extends VariantEvaluator {

    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {

-        if (eval != null ) {
+        if (eval != null && eval.isPolymorphic()) {
            if ( indelStats == null ) {
                indelStats = new IndelStats(eval);
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/SimpleMetricsByAC.java
@ -120,7 +120,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
            if ( eval.hasGenotypes() )
                ac = eval.getChromosomeCount(eval.getAlternateAllele(0));
            else if ( eval.hasAttribute("AC") ) {
-                ac = Integer.valueOf(eval.getAttributeAsString("AC"));
+                ac = eval.getAttributeAsInt("AC", -1);
            }

            if ( ac != -1 ) {
@ -166,7 +166,7 @@ public class SimpleMetricsByAC extends VariantEvaluator implements StandardEval
                }
            }

-            if ( eval.isSNP() && eval.isBiallelic() && metrics != null ) {
+            if ( eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() && metrics != null ) {
                metrics.incrValue(eval);
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ThetaVariantEvaluator.java
@ -37,77 +37,74 @@ public class ThetaVariantEvaluator extends VariantEvaluator {
    }

    public String update1(VariantContext vc, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if (vc == null || !vc.isSNP() || !vc.hasGenotypes()) {
+        if (vc == null || !vc.isSNP() || !vc.hasGenotypes() || vc.isMonomorphic()) {
            return null; //no interesting sites
        }

-        if (vc.hasGenotypes()) {
+        //this maps allele to a count
+        ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();

-            //this maps allele to a count
-            ConcurrentMap<String, Integer> alleleCounts = new ConcurrentHashMap<String, Integer>();
+        int numHetsHere = 0;
+        float numGenosHere = 0;
+        int numIndsHere = 0;

-            int numHetsHere = 0;
-            float numGenosHere = 0;
-            int numIndsHere = 0;
+        for (Genotype genotype : vc.getGenotypes().values()) {
+            numIndsHere++;
+            if (!genotype.isNoCall()) {
+                //increment stats for heterozygosity
+                if (genotype.isHet()) {
+                    numHetsHere++;
+                }

-            for (Genotype genotype : vc.getGenotypes().values()) {
-                numIndsHere++;
-                if (!genotype.isNoCall()) {
-                    //increment stats for heterozygosity
-                    if (genotype.isHet()) {
-                        numHetsHere++;
-                    }
+                numGenosHere++;
+                //increment stats for pairwise mismatches

-                    numGenosHere++;
-                    //increment stats for pairwise mismatches
-
-                    for (Allele allele : genotype.getAlleles()) {
-                        if (allele.isNonNull() && allele.isCalled()) {
-                            String alleleString = allele.toString();
-                            alleleCounts.putIfAbsent(alleleString, 0);
-                            alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
-                        }
+                for (Allele allele : genotype.getAlleles()) {
+                    if (allele.isNonNull() && allele.isCalled()) {
+                        String alleleString = allele.toString();
+                        alleleCounts.putIfAbsent(alleleString, 0);
+                        alleleCounts.put(alleleString, alleleCounts.get(alleleString) + 1);
                    }
                }
            }
-            if (numGenosHere > 0) {
-                //only if have one called genotype at least
-                this.numSites++;
+        }
+        if (numGenosHere > 0) {
+            //only if have one called genotype at least
+            this.numSites++;

-                this.totalHet += numHetsHere / numGenosHere;
+            this.totalHet += numHetsHere / numGenosHere;

-                //compute based on num sites
-                float harmonicFactor = 0;
-                for (int i = 1; i <= numIndsHere; i++) {
-                    harmonicFactor += 1.0 / i;
-                }
-                this.thetaRegionNumSites += 1.0 / harmonicFactor;
+            //compute based on num sites
+            float harmonicFactor = 0;
+            for (int i = 1; i <= numIndsHere; i++) {
+                harmonicFactor += 1.0 / i;
+            }
+            this.thetaRegionNumSites += 1.0 / harmonicFactor;

-                //now compute pairwise mismatches
-                float numPairwise = 0;
-                float numDiffs = 0;
-                for (String allele1 : alleleCounts.keySet()) {
-                    int allele1Count = alleleCounts.get(allele1);
+            //now compute pairwise mismatches
+            float numPairwise = 0;
+            float numDiffs = 0;
+            for (String allele1 : alleleCounts.keySet()) {
+                int allele1Count = alleleCounts.get(allele1);

-                    for (String allele2 : alleleCounts.keySet()) {
-                        if (allele1.compareTo(allele2) < 0) {
-                            continue;
-                        }
-                        if (allele1 .compareTo(allele2) == 0) {
-                            numPairwise += allele1Count * (allele1Count - 1) * .5;
+                for (String allele2 : alleleCounts.keySet()) {
+                    if (allele1.compareTo(allele2) < 0) {
+                        continue;
+                    }
+                    if (allele1 .compareTo(allele2) == 0) {
+                        numPairwise += allele1Count * (allele1Count - 1) * .5;

-                        }
-                        else {
-                            int allele2Count = alleleCounts.get(allele2);
-                            numPairwise += allele1Count * allele2Count;
-                            numDiffs += allele1Count * allele2Count;
-                        }
+                    }
+                    else {
+                        int allele2Count = alleleCounts.get(allele2);
+                        numPairwise += allele1Count * allele2Count;
+                        numDiffs += allele1Count * allele2Count;
                    }
                }
+            }

-                if (numPairwise > 0) {
-                    this.totalAvgDiffs += numDiffs / numPairwise;
-                }
+            if (numPairwise > 0) {
+                this.totalAvgDiffs += numDiffs / numPairwise;
            }
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/TiTvVariantEvaluator.java
@ -40,7 +40,7 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
    }

    public void updateTiTv(VariantContext vc, boolean updateStandard) {
-        if (vc != null && vc.isSNP() && vc.isBiallelic()) {
+        if (vc != null && vc.isSNP() && vc.isBiallelic() && vc.isPolymorphic()) {
            if (VariantContextUtils.isTransition(vc)) {
                if (updateStandard) nTiInComp++;
                else nTi++;
@ -49,18 +49,14 @@ public class TiTvVariantEvaluator extends VariantEvaluator implements StandardEv
                else nTv++;
            }

-            String refStr = vc.getReference().getBaseString().toUpperCase();
-            String aaStr = vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase();
-
-            if (aaStr != null && !aaStr.equalsIgnoreCase("null") && !aaStr.equals(".")) {
-                BaseUtils.BaseSubstitutionType aaSubType = BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0]);
-
-                //System.out.println(refStr + " " + vc.getAttributeAsString("ANCESTRALALLELE").toUpperCase() + " " + aaSubType);
-
-                if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSITION) {
-                    nTiDerived++;
-                } else if (aaSubType == BaseUtils.BaseSubstitutionType.TRANSVERSION) {
-                    nTvDerived++;
+            if (vc.hasAttribute("ANCESTRALALLELE")) {
+                final String aaStr = vc.getAttributeAsString("ANCESTRALALLELE", "null").toUpperCase();
+                if ( ! aaStr.equals(".") ) {
+                    switch ( BaseUtils.SNPSubstitutionType(aaStr.getBytes()[0], vc.getAlternateAllele(0).getBases()[0] ) ) {
+                        case TRANSITION: nTiDerived++; break;
+                        case TRANSVERSION: nTvDerived++; break;
+                        default: break;
+                    }
                }
            }
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/ValidationReport.java
@ -117,7 +117,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
    public SiteStatus calcSiteStatus(VariantContext vc) {
        if ( vc == null ) return SiteStatus.NO_CALL;
        if ( vc.isFiltered() ) return SiteStatus.FILTERED;
-        if ( ! vc.isVariant() ) return SiteStatus.MONO;
+        if ( vc.isMonomorphic() ) return SiteStatus.MONO;
+        if ( vc.hasGenotypes() ) return SiteStatus.POLY;  // must be polymorphic if isMonomorphic was false and there are genotypes

        if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
            int ac = 0;
@ -130,10 +131,8 @@ public class ValidationReport extends VariantEvaluator implements StandardEval {
 ////                System.out.printf("  ac = %d%n", ac);
            }
            else
-                ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
+                ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY, 0);
            return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
-        } else if ( vc.hasGenotypes() ) {
-            return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
        } else {
            return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
            //return SiteStatus.NO_CALL; // we can't figure out what to do
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
@ -232,7 +232,7 @@ public class VariantQualityScore extends VariantEvaluator {
    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
        final String interesting = null;

-        if( eval != null && eval.isSNP() && eval.isBiallelic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
+        if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphic() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
            if( titvStats == null ) { titvStats = new TiTvStats(); }
            titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@ -44,7 +44,7 @@ public class AlleleCount extends VariantStratifier {
        if (eval != null) {
            int AC = -1;
            if ( eval.hasAttribute("AC") && eval.getAttribute("AC") instanceof Integer ) {
-                AC = eval.getAttributeAsInt("AC");
+                AC = eval.getAttributeAsInt("AC", 0);
            } else if ( eval.isVariant() ) {
                for (Allele allele : eval.getAlternateAlleles())
                    AC = Math.max(AC, eval.getChromosomeCount(allele));
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleFrequency.java
@ -28,7 +28,7 @@ public class AlleleFrequency extends VariantStratifier {

        if (eval != null) {
            try {
-                relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF") / 5.0, 3))));
+                relevantStates.add(String.format("%.3f", (5.0 * MathUtils.round(eval.getAttributeAsDouble("AF", 0.0) / 5.0, 3))));
            } catch (Exception e) {
                return relevantStates;
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/Degeneracy.java
@ -90,8 +90,8 @@ public class Degeneracy extends VariantStratifier {
            Integer frame = null;

            if (eval.hasAttribute("refseq.functionalClass")) {
-                aa = eval.getAttributeAsString("refseq.variantAA");
-                frame = eval.getAttributeAsInt("refseq.frame");
+                aa = eval.getAttributeAsString("refseq.variantAA", null);
+                frame = eval.getAttributeAsInt("refseq.frame", 0);
            } else if (eval.hasAttribute("refseq.functionalClass_1")) {
                int annotationId = 1;
                String key;
@ -99,7 +99,7 @@ public class Degeneracy extends VariantStratifier {
                do {
                    key = String.format("refseq.functionalClass_%d", annotationId);

-                    String newtype = eval.getAttributeAsString(key);
+                    String newtype = eval.getAttributeAsString(key, null);

                    if ( newtype != null &&
                            ( type == null ||
@ -109,13 +109,13 @@ public class Degeneracy extends VariantStratifier {
                        type = newtype;

                        String aakey = String.format("refseq.variantAA_%d", annotationId);
-                        aa = eval.getAttributeAsString(aakey);
+                        aa = eval.getAttributeAsString(aakey, null);

                        if (aa != null) {
                            String framekey = String.format("refseq.frame_%d", annotationId);

                            if (eval.hasAttribute(framekey)) {
-                                frame = eval.getAttributeAsInt(framekey);
+                                frame = eval.getAttributeAsInt(framekey, 0);
                            }
                        }
                    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;

 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.ArrayList;
@ -11,25 +12,34 @@ import java.util.List;
 * Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
 */
 public class FunctionalClass extends VariantStratifier {
-    @Override
-    public void initialize() {
-        states.add("all");
-        states.add("silent");
-        states.add("missense");
-        states.add("nonsense");
+
+    public enum FunctionalType {
+        silent,
+        missense,
+        nonsense
    }


-    public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
+    @Override
+    public void initialize() {
+        states.add("all");
+        for ( FunctionalType type : FunctionalType.values() )
+            states.add(type.name());
+    }
+
+
+public List<String> getRelevantStates(ReferenceContext ref, RefMetaDataTracker tracker, VariantContext comp, String compName, VariantContext eval, String evalName, String sampleName) {
        ArrayList<String> relevantStates = new ArrayList<String>();

        relevantStates.add("all");

        if (eval != null && eval.isVariant()) {
-            String type = null;
+            FunctionalType type = null;

            if (eval.hasAttribute("refseq.functionalClass")) {
-                type = eval.getAttributeAsString("refseq.functionalClass");
+                try {
+                    type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass", null));
+                } catch ( Exception e ) {} // don't error out if the type isn't supported
            } else if (eval.hasAttribute("refseq.functionalClass_1")) {
                int annotationId = 1;
                String key;
@ -37,24 +47,36 @@ public class FunctionalClass extends VariantStratifier {
                do {
                    key = String.format("refseq.functionalClass_%d", annotationId);

-                    String newtype = eval.getAttributeAsString(key);
-
-                    if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
-                         ( type == null ||
-                         ( type.equals("silent") && !newtype.equals("silent") ) ||
-                         ( type.equals("missense") && newtype.equals("nonsense") ) )
-                       ) {
-                        type = newtype;
+                    String newtypeStr = eval.getAttributeAsString(key, null);
+                    if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
+                        try {
+                            FunctionalType newType = FunctionalType.valueOf(newtypeStr);
+                            if ( type == null ||
+                                ( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
+                                ( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
+                                type = newType;
+                            }
+                        } catch ( Exception e ) {} // don't error out if the type isn't supported
                    }

                    annotationId++;
                } while (eval.hasAttribute(key));
+
+            } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) {
+                try {
+                    SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString());
+                    if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE )
+                        type = FunctionalType.nonsense;
+                    else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE )
+                        type = FunctionalType.missense;
+                    else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT )
+                        type = FunctionalType.silent;
+                    }
+                catch ( Exception e ) {} // don't error out if the type isn't supported
            }

-            if (type != null) {
-                if      (type.equals("silent"))   { relevantStates.add("silent");   }
-                else if (type.equals("missense")) { relevantStates.add("missense"); }
-                else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
+            if ( type != null ) {
+                relevantStates.add(type.name());
            }
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/util/VariantEvalUtils.java
@ -277,7 +277,7 @@ public class VariantEvalUtils {
     * @return a new VariantContext with just the requested samples
     */
    public VariantContext getSubsetOfVariantContext(VariantContext vc, Collection<String> sampleNames) {
-        VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values());
+        VariantContext vcsub = vc.subContextFromGenotypes(vc.getGenotypes(sampleNames).values(), vc.getAlleles());

        HashMap<String, Object> newAts = new HashMap<String, Object>(vcsub.getAttributes());

@ -354,7 +354,7 @@ public class VariantEvalUtils {

    private void addMapping(HashMap<String, Set<VariantContext>> mappings, String sample, VariantContext vc) {
        if ( !mappings.containsKey(sample) )
-            mappings.put(sample, new HashSet<VariantContext>());
+            mappings.put(sample, new LinkedHashSet<VariantContext>());
        mappings.get(sample).add(vc);
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/TrainingSet.java
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2011 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 3/12/11
+ */
+
+public class TrainingSet {
+
+    public RodBinding<VariantContext> rodBinding;
+    public boolean isKnown = false;
+    public boolean isTraining = false;
+    public boolean isAntiTraining = false;
+    public boolean isTruth = false;
+    public boolean isConsensus = false;
+    public double prior = 0.0;
+
+    protected final static Logger logger = Logger.getLogger(TrainingSet.class);
+
+    public TrainingSet( final RodBinding<VariantContext> rodBinding) {
+        this.rodBinding = rodBinding;
+
+        final Tags tags = rodBinding.getTags();
+        final String name = rodBinding.getName();
+
+        // Parse the tags to decide which tracks have which properties
+        if( tags != null ) {
+            isKnown = tags.containsKey("known") && tags.getValue("known").equals("true");
+            isTraining = tags.containsKey("training") && tags.getValue("training").equals("true");
+            isAntiTraining = tags.containsKey("bad") && tags.getValue("bad").equals("true");
+            isTruth = tags.containsKey("truth") && tags.getValue("truth").equals("true");
+            isConsensus = tags.containsKey("consensus") && tags.getValue("consensus").equals("true");
+            prior = ( tags.containsKey("prior") ? Double.parseDouble(tags.getValue("prior")) : prior );
+        }
+
+        // Report back to the user which tracks were found and the properties that were detected
+        if( !isConsensus && !isAntiTraining ) {
+            logger.info( String.format( "Found %s track: \tKnown = %s \tTraining = %s \tTruth = %s \tPrior = Q%.1f", name, isKnown, isTraining, isTruth, prior) );
+        } else if( isConsensus ) {
+            logger.info( String.format( "Found consensus track: %s", name) );
+        } else {
+            logger.info( String.format( "Found bad sites training track: %s", name) );
+        }
+    }
+}
--- a/Show More
+++ b/Show More