Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/stable

2011-11-16 09:57:13 -05:00 · 2011-11-16 09:57:13 -05:00 · 6a5d5e7ac9
parent 7ac5cf8430 348f2db7fd
commit 6a5d5e7ac9
375 changed files with 13496 additions and 9533 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,20 @@
 /*.bam
 /*.bai
 /*.bed
 *.idx
 *~
 /*.vcf
 /*.txt
 /*.csh
 /.*
 /*.pdf
 /*.eval
 *.ipr
 *.iws
 *.iml
 .DS_Store
 queueScatterGather
 /foo*
 /bar*
 integrationtests/
 public/testdata/onTheFlyOutputTest.vcf
--- a/build.xml
+++ b/build.xml
@ -28,6 +28,8 @@
    <property name="build.dir" value="build" />
    <property name="dist.dir" value="dist" />
    <property name="contract.dump.dir" value="dump" />
    <property name="pipelinetest.dir" value="pipelinetests" />
    <property name="lib.dir" value="lib" />
    <property name="external.dir" value="external" />
    <property name="public.dir" value="public" />
@ -35,18 +37,26 @@
    <property name="java.public.source.dir" value="${public.dir}/java/src" />
    <property name="java.private.source.dir" value="${private.dir}/java/src" />
    <property name="java.classes" value="${build.dir}/java/classes" />
    <property name="R.public.scripts.dir" value="${public.dir}/R/scripts" />
    <property name="R.private.scripts.dir" value="${private.dir}/R/scripts" />
    <property name="R.public.src.dir" value="${public.dir}/R/src" />
    <!-- Legacy: Installing libraries back into the source directory
    instead of the build or dist directory... intentionally avoids ant clean?? -->
    <property name="R.library.dir" value="${public.dir}/R" />
    <property name="R.tar.dir" value="${build.dir}/R/src" />
    <property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
    <property name="resource.file" value="StingText.properties" />
    <property name="resource.path" value="${java.classes}/StingText.properties" />
    <property name="scala.public.source.dir" value="${public.dir}/scala/src" />
-    <property name="scala.private.source.dir" value="${private.dir}/scala/src" />    
+    <property name="scala.private.source.dir" value="${private.dir}/scala/src" />
    <property name="scala.classes" value="${build.dir}/scala/classes" />
    <property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />
    <property name="javadoc.dir" value="javadoc" />
    <property name="scaladoc.dir" value="scaladoc" />
-    
+
    <!-- Contracts for Java -->
    <!-- By default, enabled only for test targets -->
    <!-- To disable for test targets, run with -Duse.contracts=false -->
@ -60,7 +70,7 @@
    <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
    <property name="halt" value="yes" />
-    
+
    <!-- should our unit test output go to a file or the screen?
    false means it goes to the screen (default) true to file -->
    <property name="usefile" value="false" />
@ -82,7 +92,7 @@
        <patternset refid="java.source.pattern" />
    </fileset>
-    <!-- terrible hack to get gatkdocs to see all files --> 
+    <!-- terrible hack to get gatkdocs to see all files -->
    <patternset id="all.java.source.pattern">
        <include name="${java.public.source.dir}/**/*.java" />
        <include name="${java.private.source.dir}/**/*.java" />
@ -113,7 +123,7 @@
      <exclude name="testng*.jar" />
      <exclude name="bcel*.jar" />
    </patternset>
-    
+
    <path id="external.dependencies">
      <fileset dir="${lib.dir}">
        <patternset refid="dependency.mask" />
@ -154,16 +164,18 @@
        <property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
        <property name="ivy.settings.dir" value="settings"/>
        <property file="${ivy.settings.dir}/ivysettings.properties"/>
-  
+
      <mkdir dir="${lib.dir}"/>
      <mkdir dir="${ivy.jar.dir}"/>
      <!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
      <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
           dest="${ivy.jar.dir}/${ivy.jar.file}"
           usetimestamp="true"/>
      <taskdef resource="org/apache/ivy/ant/antlib.xml"
               uri="antlib:org.apache.ivy.ant"
               classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
      <ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
        <property name="init.resolve.done" value="true"/>
    </target>
@ -209,11 +221,11 @@
        <equals arg1="${git.describe.exit.value}" arg2="0" />
      </condition>
    </target>
-    
+
    <target name="tagged.build.version" depends="git.describe" if="git.describe.succeeded">
        <property name="build.version" value="${git.describe.output}" />
    </target>
-    
+
    <target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
      <exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
        <arg line="rev-parse HEAD" />
@ -222,11 +234,11 @@
        <equals arg1="${git.rev-parse.exit.value}" arg2="0" />
      </condition>
    </target>
-    
+
    <target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
        <property name="build.version" value="${git.rev-parse.output}" />
    </target>
-    
+
    <target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
        <!-- Set build.version to exported if no other value has been set -->
        <property name="build.version" value="exported" />
@ -264,7 +276,7 @@
        <echo message="Scala build     : ${scala.target}"/>
      <echo message="source revision : ${build.version}"/>
      <echo message="build time      : ${build.timestamp}" />
-      
+
      <condition property="include.private">
        <equals arg1="${gatk.target}" arg2="private" casesensitive="false" />
      </condition>
@ -310,13 +322,13 @@
    <target name="gatk.compile.public.source" depends="init,resolve">
        <javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
          <compilerarg value="-proc:none"/>
-        </javac>    
+        </javac>
    </target>
    <target name="gatk.compile.private.source" depends="gatk.compile.public.source" if="include.private">
        <javac fork="true" srcdir="${java.private.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
          <compilerarg value="-proc:none"/>
-        </javac>    
+        </javac>
    </target>
    <target name="gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source">
@ -325,11 +337,11 @@
            <property name="dist.dir" value="${external.dist.dir}" />
            <property name="gatk.classpath" value="${external.gatk.classpath}" />
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
-        </subant>    
+        </subant>
    </target>
-    <target name="gatk.compile.source" 
+    <target name="gatk.compile.source"
-            depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source" 
+            depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
            description="compile the GATK source" />
   <target name="gatk.contracts.public" depends="gatk.compile.source" if="include.contracts">
@ -339,9 +351,9 @@
                <pathelement path="${java.classes}" />
            </classpath>
            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+            <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
            <compilerarg value="-proc:only"/>
-        </javac>   
+        </javac>
   </target>
    <target name="check.contracts.private" depends="gatk.contracts.public">
@ -360,14 +372,14 @@
                <pathelement path="${java.classes}" />
            </classpath>
            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+            <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
            <compilerarg value="-proc:only"/>
-        </javac>   
+        </javac>
   </target>
-    <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private" 
+    <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
            description="create GATK contracts" if="include.contracts" />
-    
+
    <target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />
    <target name="init.queue-extensions.generate" depends="gatk.compile">
@ -411,9 +423,9 @@
            <src path="${scala.public.source.dir}" />
            <src path="${queue-extensions.source.dir}" />
            <include name="**/*.scala"/>
-        </scalac>    
+        </scalac>
    </target>
-    
+
    <target name="check.scala.private" depends="scala.compile.public">
        <condition property="include.scala.private">
          <and>
@ -422,12 +434,12 @@
          </and>
        </condition>
    </target>
-    
+
    <target name="scala.compile.private" depends="check.scala.private" if="include.scala.private">
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
            <src path="${scala.private.source.dir}" />
            <include name="**/*.scala"/>
-        </scalac>       
+        </scalac>
    </target>
    <target name="scala.compile" depends="scala.compile.public,scala.compile.private" if="scala.include" description="compile Scala" />
@ -530,6 +542,11 @@
    <target name="sting.compile" depends="gatk.compile, scala.compile" />
    <target name="R.public.tar">
        <mkdir dir="${R.tar.dir}/${R.package.path}" />
        <tar compression="gzip" basedir="${R.public.src.dir}/${R.package.path}" includes="gsalib/**" destfile="${R.tar.dir}/${R.package.path}/gsalib.tar.gz" />
    </target>
    <target name="init.jar" depends="sting.compile,extracthelp">
        <mkdir dir="${dist.dir}"/>
        <copy todir="${dist.dir}">
@ -537,7 +554,7 @@
        </copy>
    </target>
-    <target name="sting-utils.jar" depends="gatk.compile, init.jar">
+    <target name="sting-utils.jar" depends="gatk.compile, init.jar, R.public.tar">
        <jar jarfile="${dist.dir}/StingUtils.jar">
            <fileset dir="${java.classes}">
              <include name="**/utils/**/*.class"/>
@ -549,6 +566,15 @@
            <fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
            <fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
            <fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
            <fileset dir="${R.tar.dir}">
                <include name="**/${R.package.path}/**/*.tar.gz"/>
            </fileset>
            <fileset dir="${R.public.scripts.dir}">
              <include name="**/utils/**/*.R"/>
            </fileset>
            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
              <include name="**/utils/**/*.R"/>
            </fileset>
            <manifest>
                <attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" />
            </manifest>
@ -577,6 +603,14 @@
              <include name="**/gatk/**/*.class" />
              <include name="**/alignment/**/*.class"/>
            </fileset>
            <fileset dir="${R.public.scripts.dir}">
                <include name="**/gatk/**/*.R"/>
                <include name="**/alignment/**/*.R"/>
            </fileset>
            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
                <include name="**/gatk/**/*.R"/>
                <include name="**/alignment/**/*.R"/>
            </fileset>
            <manifest>
                <attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
            </manifest>
@ -591,6 +625,14 @@
            <include name="**/analyzecovariates/**/*.class" />
            <include name="**/gatk/walkers/recalibration/*.class" />
          </fileset>
            <fileset dir="${R.public.scripts.dir}">
                <include name="**/analyzecovariates/**/*.R"/>
                <include name="**/gatk/walkers/recalibration/**/*.R"/>
            </fileset>
            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
                <include name="**/analyzecovariates/**/*.R"/>
                <include name="**/gatk/walkers/recalibration/**/*.R"/>
            </fileset>
          <manifest>
            <attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
          </manifest>
@ -603,28 +645,7 @@
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
        </subant>
    </target>
 <!--
    <target name="gatk.oneoffs.jar" depends="gatk.compile, init.jar"
            description="generate the GATK oneoffs distribution" if="include.oneoffs">
        <jar jarfile="${dist.dir}/CompareBAMAlignments.jar" whenmanifestonly="skip">
          <fileset dir="${java.classes}">
            <include name="**/tools/**/*.class" />
          </fileset>
          <manifest>
            <attribute name="Main-Class" value="org.broadinstitute.sting.oneoffprojects.tools.CompareBAMAlignments" />
          </manifest>
        </jar>
        <jar jarfile="${dist.dir}/SliceBams.jar" whenmanifestonly="skip">
          <fileset dir="${java.classes}">
            <include name="**/tools/**/*.class" />
          </fileset>
          <manifest>
            <attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.SliceBams" />
          </manifest>
        </jar>
    </target>
 -->
    <target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
        <jar jarfile="${dist.dir}/GATKScala.jar">
          <fileset dir="${scala.classes}">
@ -641,6 +662,12 @@
            <fileset dir="${java.classes}">
              <include name="org/broadinstitute/sting/queue/**/*.class" />
            </fileset>
            <fileset dir="${R.public.scripts.dir}">
                <include name="org/broadinstitute/sting/queue/**/*.R"/>
            </fileset>
            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
                <include name="org/broadinstitute/sting/queue/**/*.R"/>
            </fileset>
            <manifest>
                <attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
            </manifest>
@ -680,20 +707,7 @@
        </jar>
    </target>
-<!--
+
    <target name="gatk.oneoffs.manifests" depends="gatk.oneoffs.jar, init.manifests" if="include.oneoffs">
        <jar jarfile="${dist.dir}/CompareBAMAlignments.jar" update="true" whenmanifestonly="skip">
            <manifest>
                <attribute name="Class-Path" value="${jar.classpath}" />
            </manifest>
        </jar>
        <jar jarfile="${dist.dir}/SliceBams.jar" update="true" whenmanifestonly="skip">
            <manifest>
                <attribute name="Class-Path" value="${jar.classpath}" />
            </manifest>
        </jar>
    </target>
 -->
    <target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
        <jar jarfile="${dist.dir}/Queue.jar" update="true" >
            <manifest>
@ -778,10 +792,6 @@
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
 <!--
            <compilerarg value="-Acom.google.java.contract.debug"/>
            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
 -->
        </javac>
    </target>
@ -798,10 +808,6 @@
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
 <!--
            <compilerarg value="-Acom.google.java.contract.debug"/>
            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
 -->
        </javac>
    </target>
@ -849,6 +855,9 @@
        <pathelement location="${java.private.test.classes}" />
        <pathelement location="${scala.public.test.classes}" />
        <pathelement location="${scala.private.test.classes}" />
        <pathelement location="${R.tar.dir}" />
        <pathelement location="${R.public.scripts.dir}" />
        <pathelement location="${R.private.scripts.dir}" />
    </path>
    <path id="testng.gatk.releasetest.classpath">
@ -1185,19 +1194,18 @@
    </target>
    <target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
        <delete dir="out"/>
        <delete dir="${build.dir}"/>
        <delete dir="${lib.dir}"/>
-        <delete dir="dump"/>
+        <delete dir="${contract.dump.dir}"/>
        <delete dir="${staging.dir}"/>
        <delete dir="${dist.dir}"/>
-        <delete dir="pipelinetests"/>
+        <delete dir="${pipelinetest.dir}"/>
    </target>
    <!-- Build gsalib R module -->
    <target name="gsalib">
      <exec executable="R" failonerror="true">
-        <arg line="R CMD INSTALL -l public/R/ public/R/src/gsalib/" />
+        <arg line="R CMD INSTALL -l ${R.library.dir} ${R.public.src.dir}/${R.package.path}/gsalib" />
      </exec>
    </target>
 </project>
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript
 library(tools)
 args <- commandArgs(TRUE)
 verbose = TRUE
@ -47,6 +49,9 @@ if( is.numeric(c$Covariate) ) {
 }
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
 #
 # Plot mean quality versus the covariate
@ -69,6 +74,10 @@ if( is.numeric(c$Covariate) ) {
 }
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
 #
 # Plot histogram of the covariate
 #
@ -106,3 +115,7 @@ if( is.numeric(c$Covariate) ) {
 	axis(2,axTicks(2), format(axTicks(2), scientific=F))
 }
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript
 library(tools)
 args <- commandArgs(TRUE)
 input = args[1]
@ -33,6 +35,10 @@ points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
 abline(0,1, lty=2)
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
 #
 # Plot Q empirical histogram
 #
@ -52,6 +58,10 @@ points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
 #
 # Plot Q reported histogram
 #
@ -68,3 +78,7 @@ plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yM
 points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
--- a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/variantrecalibration/plot_Tranches.R
+++ b/public/R/scripts/org/broadinstitute/sting/gatk/walkers/variantrecalibration/plot_Tranches.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript
 library(tools)
 args <- commandArgs(TRUE)
 verbose = TRUE
@ -85,3 +87,7 @@ if ( ! is.null(sensitivity) ) {
 }
 dev.off()
 if (exists('compactPDF')) {
  compactPDF(outfile)
 }
--- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
+++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
@ -12,20 +12,20 @@ if ( onCMDLine ) {
  inputFileName = args[1]
  outputPDF = args[2]
 } else {
-  #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
+  inputFileName = "~/Desktop/broadLocal/GATK/unstable/wgs.jobreport.txt"
-  inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
+  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
  outputPDF = NA
 }
-RUNTIME_UNITS = "(sec)"
+RUNTIME_UNITS = "(hours)"
-ORIGINAL_UNITS_TO_SECONDS = 1/1000
+ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60
 # 
 # Helper function to aggregate all of the jobs in the report across all tables
 #
 allJobsFromReport <- function(report) {
-  names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
+  names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime")
  sub <- lapply(report, function(table) table[,names])
  do.call("rbind", sub)
 }
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
 #
 # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
 #
-plotJobsGantt <- function(gatkReport, sortOverall) {
+plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
  allJobs = allJobsFromReport(gatkReport)
  if ( sortOverall ) {
    title = "All jobs, by analysis, by start time"
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
  }
  allJobs$index = 1:nrow(allJobs)
  minTime = min(allJobs$startTime)
-  allJobs$relStartTime = allJobs$startTime - minTime
+  allJobs$relStartTime = allJobs$startTime - minTime 
  allJobs$relDoneTime = allJobs$doneTime - minTime
  allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
  maxRelTime = max(allJobs$relDoneTime)
  p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
-  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
+  p <- p + theme_bw()
-  p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
+  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
  if ( includeText )
    p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
  p <- p + xlim(0, maxRelTime * 1.1)
-  p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
+  p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS))
-  p <- p + ylab("Job")
+  p <- p + ylab("Job number")
  p <- p + opts(title=title)
  print(p)
 }
@ -119,7 +121,7 @@ plotGroup <- function(groupTable) {
  if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
    # todo -- how do we group by annotations?
    p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
-    p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
+    p <- p + xlab(paste("runtime", RUNTIME_UNITS)) + ylab("No. of jobs")
    p <- p + opts(title=paste("Job runtime histogram for", name))
    print(p)
  }
@ -139,9 +141,9 @@ print(paste("Project          :", inputFileName))
 convertUnits <- function(gatkReportData) {
  convertGroup <- function(g) {
-    g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
+    g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
-    g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
+    g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
-    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
+    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
    g
  }
  lapply(gatkReportData, convertGroup)
@ -157,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
  pdf(outputPDF, height=8.5, width=11)
 } 
-plotJobsGantt(gatkReportData, T)
+plotJobsGantt(gatkReportData, T, F)
-plotJobsGantt(gatkReportData, F)
+plotJobsGantt(gatkReportData, F, F)
 plotProgressByTime(gatkReportData)
 for ( group in gatkReportData ) {
 plotGroup(group)
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
        .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
    }
-    gatkreport = as.list(tableEnv);
+    gatkreport = as.list(tableEnv, all.names=TRUE);
 }
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.alignment;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.Iterator;
@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {
    /**
     * Aligns a read to the given reference.
     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of reads aligned by this map (aka 1).
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        //logger.info(String.format("examining read %s", read.getReadName()));
        byte[] bases = read.getReadBases();
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.WalkerName;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.io.File;
@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {
    /**
     * Aligns a read to the given reference.
     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of alignments found for this read.
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        SAMRecord alignedRead = aligner.align(read,header);
        out.addAlignment(alignedRead);
        return 1;
--- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.alignment;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.io.PrintStream;
 import java.util.Iterator;
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {
    /**
     * Aligns a read to the given reference.
     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of alignments found for this read.
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
        if(alignmentIterator.hasNext()) {
            int numAlignments = alignmentIterator.next().length;
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -25,6 +25,9 @@
 package org.broadinstitute.sting.analyzecovariates;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.CommandLineProgram;
@ -33,14 +36,16 @@ import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.io.Resource;
 import org.broadinstitute.sting.utils.text.XReadLines;
 import java.io.*;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Map;
 import java.util.regex.Pattern;
@ -71,15 +76,13 @@ import java.util.regex.Pattern;
 * </ul>
 *
 * <p>
- * NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
+ * NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
- * must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
+ * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
 * not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
 * this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
 * For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
 *
 * <p>
 * See the GATK wiki for a tutorial and example recalibration accuracy plots.
- * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
+ * <a target="gatkwiki" href="http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration"
 * >http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration</a>
 *
 * <h2>Input</h2>
 * <p>
@ -91,7 +94,6 @@ import java.util.regex.Pattern;
 * java -Xmx4g -jar AnalyzeCovariates.jar \
 *   -recalFile /path/to/recal.table.csv  \
 *   -outputDir /path/to/output_dir/  \
 *   -resources resources/  \
 *   -ignoreQ 5
 * </pre>
 *
@ -101,6 +103,11 @@ import java.util.regex.Pattern;
        groupName = "AnalyzeCovariates",
        summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
 public class AnalyzeCovariates extends CommandLineProgram {
    final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class);
    private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R";
    private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R";
    private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R";
    /////////////////////////////
    // Command Line Arguments
@ -114,11 +121,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
    @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
    private String RECAL_FILE = "output.recal_data.csv";
    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
-    private String OUTPUT_DIR = "analyzeCovariates/";
+    private File OUTPUT_DIR = new File("analyzeCovariates");
    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
    private String PATH_TO_RSCRIPT = "Rscript";
    @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
    private String PATH_TO_RESOURCES = "public/R/";
    @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
    private int IGNORE_QSCORES_LESS_THAN = 5;
    @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
@ -154,29 +157,26 @@ public class AnalyzeCovariates extends CommandLineProgram {
    protected int execute() {
        // create the output directory where all the data tables and plots will go
-        try {
+        if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs())
-            Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
+            throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR);
-        } catch (IOException e) {
+
-            System.out.println("Couldn't create directory: " + OUTPUT_DIR);
+        if (!RScriptExecutor.RSCRIPT_EXISTS)
-            System.out.println("User is responsible for making sure the output directory exists.");
+            Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated.");
        }
        if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
        if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
        // initialize all the data from the csv file and allocate the list of covariates
-        System.out.println("Reading in input csv file...");
+        logger.info("Reading in input csv file...");
        initializeData();
-        System.out.println("...Done!");
+        logger.info("...Done!");
        // output data tables for Rscript to read in
-        System.out.println("Writing out intermediate tables for R...");
+        logger.info("Writing out intermediate tables for R...");
        writeDataTables();
-        System.out.println("...Done!");
+        logger.info("...Done!");
        // perform the analysis using Rscript and output the plots
-        System.out.println("Calling analysis R scripts and writing out figures...");
+        logger.info("Calling analysis R scripts and writing out figures...");
        callRScripts();
-        System.out.println("...Done!");
+        logger.info("...Done!");
        return 0;
    }
@ -287,37 +287,40 @@ public class AnalyzeCovariates extends CommandLineProgram {
            if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
                String readGroup = readGroupKey.toString();
                RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
-                System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations"  );
+                logger.info(String.format(
-                System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
+                        "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
                        readGroup, readGroupDatum.getNumObservations(),
                        readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
                // for each covariate
                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
                    Covariate cov = requestedCovariates.get(iii);
                    // Create a PrintStream
-                    PrintStream output = null;
+                    File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
                    PrintStream output;
                    try {
-                        output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"));
+                        output = new PrintStream(FileUtils.openOutputStream(outputFile));
-
+                    } catch (IOException e) {
-                    } catch (FileNotFoundException e) {
+                        throw new UserException.CouldNotCreateOutputFile(outputFile, e);
                        System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
                        System.exit(-1);
                    }
-                    // Output the header
+                    try {
-                    output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
+                        // Output the header
                        output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
-                    for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
+                        for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
-                        output.print( covariateKey.toString() + "\t" );                                                     // Covariate
+                            output.print( covariateKey.toString() + "\t" );                                                     // Covariate
-                        RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
+                            RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
-                        output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
+                            output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
-                        output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
+                            output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
-                        output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
+                            output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
-                        output.println( thisDatum.getNumObservations() );                                                   // nBases
+                            output.println( thisDatum.getNumObservations() );                                                   // nBases
                        }
                    } finally {
                        // Close the PrintStream
                        IOUtils.closeQuietly(output);
                    }
                    // Close the PrintStream
                    output.close();
                }
            } else {
                break;
@ -327,10 +330,6 @@ public class AnalyzeCovariates extends CommandLineProgram {
    }
    private void callRScripts() {
        RScriptExecutor.RScriptArgumentCollection argumentCollection =
                new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
        RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
        int numReadGroups = 0;
        // for each read group
@ -338,23 +337,32 @@ public class AnalyzeCovariates extends CommandLineProgram {
            if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {
                String readGroup = readGroupKey.toString();
-                System.out.println("Analyzing read group: " + readGroup);
+                logger.info("Analyzing read group: " + readGroup);
                // for each covariate
                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
                    Covariate cov = requestedCovariates.get(iii);
-                    final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
+                    final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
                    if (DO_INDEL_QUALITY) {
-                        executor.callRScripts("plot_indelQuality.R", outputFilename,
+                        RScriptExecutor executor = new RScriptExecutor();
-                                cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
+                        executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class));
                        // The second argument is the name of the covariate in order to make the plots look nice
                        executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
                        executor.exec();
                    }   else {
                        if( iii == 1 ) {
                            // Analyze reported quality
-                            executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
+                            RScriptExecutor executor = new RScriptExecutor();
-                                    IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
+                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class));
                            // The second argument is the Q scores that should be turned pink in the plot because they were ignored
                            executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE);
                            executor.exec();
                        } else { // Analyze all other covariates
-                            executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
+                            RScriptExecutor executor = new RScriptExecutor();
-                                    cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
+                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class));
                            // The second argument is the name of the covariate in order to make the plots look nice
                            executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
                            executor.exec();
                        }
                    }
                }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    /**
     * Maps indices of command line arguments to values paired with that argument.
     */
-    public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
+    public final SortedMap<ArgumentMatchSite,List<String>> sites = new TreeMap<ArgumentMatchSite,List<String>>();
    /**
     * An ordered, freeform collection of tags.
@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    }
    /**
-     * A simple way of indicating that an argument with the given label and definition exists at this index.
+     * A simple way of indicating that an argument with the given label and definition exists at this site.
     * @param label Label of the argument match.  Must not be null.
     * @param definition The associated definition, if one exists.  May be null.
-     * @param index Position of the argument.  Must not be null.
+     * @param site Position of the argument.  Must not be null.
     * @param tags ordered freeform text tags associated with this argument.
     */
-    public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) {
+    public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) {
-        this( label, definition, index, null, tags );
+        this( label, definition, site, null, tags );
    }
    /**
-     * A simple way of indicating that an argument with the given label and definition exists at this index.
+     * A simple way of indicating that an argument with the given label and definition exists at this site.
     * @param label Label of the argument match.  Must not be null.
     * @param definition The associated definition, if one exists.  May be null.
-     * @param index Position of the argument.  Must not be null.
+     * @param site Position of the argument.  Must not be null.
     * @param value Value for the argument at this position.
     * @param tags ordered freeform text tags associated with this argument.
     */
-    private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) {
+    private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) {
        this.label = label;
        this.definition = definition;
        ArrayList<String> values = new ArrayList<String>();
        if( value != null )
            values.add(value);
-        indices.put(index,values );
+        sites.put(site,values );
        this.tags = tags;
    }
@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
        ArgumentMatch otherArgumentMatch = (ArgumentMatch)other;
        return this.definition.equals(otherArgumentMatch.definition) &&
                this.label.equals(otherArgumentMatch.label) &&
-                this.indices.equals(otherArgumentMatch.indices) &&
+                this.sites.equals(otherArgumentMatch.sites) &&
                this.tags.equals(otherArgumentMatch.tags);
    }
@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     * @param key Key which specifies the transform.
     * @return A variant of this ArgumentMatch with all keys transformed.
     */
    @SuppressWarnings("unchecked")
    ArgumentMatch transform(Multiplexer multiplexer, Object key) {
-        SortedMap<Integer,List<String>> newIndices = new TreeMap<Integer,List<String>>();
+        SortedMap<ArgumentMatchSite,List<String>> newIndices = new TreeMap<ArgumentMatchSite,List<String>>();
-        for(Map.Entry<Integer,List<String>> index: indices.entrySet()) {
+        for(Map.Entry<ArgumentMatchSite,List<String>> site: sites.entrySet()) {
            List<String> newEntries = new ArrayList<String>();
-            for(String entry: index.getValue())
+            for(String entry: site.getValue())
                newEntries.add(multiplexer.transformArgument(key,entry));
-            newIndices.put(index.getKey(),newEntries);
+            newIndices.put(site.getKey(),newEntries);
        }
        ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition);
-        newArgumentMatch.indices.putAll(newIndices);
+        newArgumentMatch.sites.putAll(newIndices);
        return newArgumentMatch;
    }
@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    public Iterator<ArgumentMatch> iterator() {
        return new Iterator<ArgumentMatch>() {
            /**
-             * Iterate over each the available index.
+             * Iterate over each the available site.
             */
-            private Iterator<Integer> indexIterator = null;
+            private Iterator<ArgumentMatchSite> siteIterator = null;
            /**
             * Iterate over each available token.
@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
            private Iterator<String> tokenIterator = null;
            /**
-             * The next index to return.  Null if none remain.
+             * The next site to return.  Null if none remain.
             */
-            Integer nextIndex = null;
+            ArgumentMatchSite nextSite = null;
            /**
             * The next token to return.  Null if none remain.
@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
            String nextToken = null;
            {
-                indexIterator = indices.keySet().iterator();
+                siteIterator = sites.keySet().iterator();
                prepareNext();
            }
@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
             * @return True if there's another token waiting in the wings.  False otherwise.
             */
            public boolean hasNext() {
-                return nextToken != null;    
+                return nextToken != null;
            }
            /**
@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
             * @return The next ArgumentMatch in the series.  Should never be null.
             */
            public ArgumentMatch next() {
-                if( nextIndex == null || nextToken == null )
+                if( nextSite == null || nextToken == null )
                    throw new IllegalStateException( "No more ArgumentMatches are available" );
-                ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags );
+                ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags );
                prepareNext();
                return match;
            }
            /**
             * Initialize the next ArgumentMatch to return.  If no ArgumentMatches are available,
-             * initialize nextIndex / nextToken to null.
+             * initialize nextSite / nextToken to null.
             */
            private void prepareNext() {
                if( tokenIterator != null && tokenIterator.hasNext() ) {
                    nextToken = tokenIterator.next();
                }
                else {
-                    nextIndex = null;
+                    nextSite = null;
                    nextToken = null;
                    // Do a nested loop.  While more data is present in the inner loop, grab that data.
                    // Otherwise, troll the outer iterator looking for more data.
-                    while( indexIterator.hasNext() ) {
+                    while( siteIterator.hasNext() ) {
-                        nextIndex = indexIterator.next();
+                        nextSite = siteIterator.next();
-                        if( indices.get(nextIndex) != null ) {
+                        if( sites.get(nextSite) != null ) {
-                            tokenIterator = indices.get(nextIndex).iterator();
+                            tokenIterator = sites.get(nextSite).iterator();
                            if( tokenIterator.hasNext() ) {
                                nextToken = tokenIterator.next();
                                break;
@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     * @param other The other match to merge into.
     */
    public void mergeInto( ArgumentMatch other ) {
-        indices.putAll(other.indices);
+        sites.putAll(other.sites);
    }
    /**
     * Associate a value with this merge maapping.
-     * @param index index of the command-line argument to which this value is mated.
+     * @param site site of the command-line argument to which this value is mated.
     * @param value Text representation of value to add.
     */
-    public void addValue( int index, String value ) {
+    public void addValue( ArgumentMatchSite site, String value ) {
-        if( !indices.containsKey(index) || indices.get(index) == null )
+        if( !sites.containsKey(site) || sites.get(site) == null )
-            indices.put(index, new ArrayList<String>() );
+            sites.put(site, new ArrayList<String>() );
-        indices.get(index).add(value);
+        sites.get(site).add(value);
    }
    /**
     * Does this argument already have a value at the given site?
     * Arguments are only allowed to be single-valued per site, and
     * flags aren't allowed a value at all.
-     * @param index Index at which to check for values.
+     * @param site Site at which to check for values.
     * @return True if the argument has a value at the given site.  False otherwise.
     */
-    public boolean hasValueAtSite( int index ) {
+    public boolean hasValueAtSite( ArgumentMatchSite site ) {
-        return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
+        return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag();
    }
    /**
@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     */
    public List<String> values() {
        List<String> values = new ArrayList<String>();
-        for( int index: indices.keySet() ) {
+        for( ArgumentMatchSite site: sites.keySet() ) {
-            if( indices.get(index) != null )
+            if( sites.get(site) != null )
-                values.addAll(indices.get(index));
+                values.addAll(sites.get(site));
        }
        return values;
    }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
@ -0,0 +1,76 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.commandline;
 /**
 * Which source and the index within the source where an argument match was found.
 */
 public class ArgumentMatchSite implements Comparable<ArgumentMatchSite> {
    private final ArgumentMatchSource source;
    private final int index;
    public ArgumentMatchSite(ArgumentMatchSource source, int index) {
        this.source = source;
        this.index = index;
    }
    public ArgumentMatchSource getSource() {
        return source;
    }
    public int getIndex() {
        return index;
    }
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        ArgumentMatchSite that = (ArgumentMatchSite) o;
        return (index == that.index) && (source == null ? that.source == null : source.equals(that.source));
    }
    @Override
    public int hashCode() {
        int result = source != null ? source.hashCode() : 0;
        // Generated by intellij. No other special reason to this implementation. -ks
        result = 31 * result + index;
        return result;
    }
    @Override
    public int compareTo(ArgumentMatchSite that) {
        int comp = this.source.compareTo(that.source);
        if (comp != 0)
            return comp;
        // Both files are the same.
        if (this.index == that.index)
            return 0;
        return this.index < that.index ? -1 : 1;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
@ -0,0 +1,98 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.commandline;
 import java.io.File;
 /**
 * Where an argument match originated, via the commandline or a file.
 */
 public class ArgumentMatchSource implements Comparable<ArgumentMatchSource> {
    public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null);
    private final ArgumentMatchSourceType type;
    private final File file;
    /**
     * Creates an argument match source from the specified file.
     * @param file File specifying the arguments. Must not be null.
     */
    public ArgumentMatchSource(File file) {
        this(ArgumentMatchSourceType.File, file);
    }
    private ArgumentMatchSource(ArgumentMatchSourceType type, File file) {
        if (type == ArgumentMatchSourceType.File && file == null)
            throw new IllegalArgumentException("An argument match source of type File cannot have a null file.");
        this.type = type;
        this.file = file;
    }
    public ArgumentMatchSourceType getType() {
        return type;
    }
    public File getFile() {
        return file;
    }
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        ArgumentMatchSource that = (ArgumentMatchSource) o;
        return (type == that.type) && (file == null ? that.file == null : file.equals(that.file));
    }
    @Override
    public int hashCode() {
        int result = type != null ? type.hashCode() : 0;
        result = 31 * result + (file != null ? file.hashCode() : 0);
        return result;
    }
    /**
     * Compares two sources, putting the command line first, then files.
     */
    @Override
    public int compareTo(ArgumentMatchSource that) {
        int comp = this.type.compareTo(that.type);
        if (comp != 0)
            return comp;
        File f1 = this.file;
        File f2 = that.file;
        if ((f1 == null) ^ (f2 == null)) {
            // If one of the files is null and the other is not
            // put the null file first
            return f1 == null ? -1 : 1;
        }
        return f1 == null ? 0 : f1.compareTo(f2);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
@ -0,0 +1,32 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.commandline;
 /**
 * Type of where an argument match originated, via the commandline or a file.
 */
 public enum ArgumentMatchSourceType {
    CommandLine, File
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * Collection matches from argument definition to argument value.
     * Package protected access is deliberate.
     */
-    Map<Integer,ArgumentMatch> argumentMatches = new TreeMap<Integer,ArgumentMatch>();
+    Map<ArgumentMatchSite,ArgumentMatch> argumentMatches = new TreeMap<ArgumentMatchSite,ArgumentMatch>();
    /**
     * Provide a place to put command-line argument values that don't seem to belong to
@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * @param site Site at which to check.
     * @return True if the site has a match.  False otherwise.
     */
-    boolean hasMatch( int site ) {
+    boolean hasMatch( ArgumentMatchSite site ) {
        return argumentMatches.containsKey( site );
    }
@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * @return The match present at the given site.
     * @throws IllegalArgumentException if site does not contain a match.
     */
-    ArgumentMatch getMatch( int site ) {
+    ArgumentMatch getMatch( ArgumentMatchSite site ) {
        if( !argumentMatches.containsKey(site) )
            throw new IllegalArgumentException( "Site does not contain an argument: " + site );
        return argumentMatches.get(site);
@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
    /**
     * Return all argument matches of this source.
     * @param parsingEngine Parsing engine.
     * @param argumentSource Argument source to match.
     * @return List of all matches.
     */
@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * TODO: Generify this.
     * @param multiplexer Multiplexer that controls the transformation process.
     * @param key Key which specifies the transform.
     * @return new argument matches.
     */
    ArgumentMatches transform(Multiplexer multiplexer, Object key) {
        ArgumentMatches newArgumentMatches = new ArgumentMatches();
@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
        for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
            if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
                argumentMatch.mergeInto( match );
-                for( int index: match.indices.keySet() )
+                for( ArgumentMatchSite site: match.sites.keySet() )
-                    argumentMatches.put( index, argumentMatch );
+                    argumentMatches.put( site, argumentMatch );
                definitionExists = true;
            }
        }
        if( !definitionExists ) {
-            for( int index: match.indices.keySet() )
+            for( ArgumentMatchSite site: match.sites.keySet() )
-                argumentMatches.put( index, match );
+                argumentMatches.put( site, match );
        }
    }    
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    @Override
    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
        return parse(parsingEngine, source, type, matches, false);
    }
    /**
     * The actual argument parsing method.
     *
     * IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
     *   that it's the best way to proceed for now.  IntervalBindings can either be proper RodBindings (hence the use of
     *   this parse() method) or can be Strings (representing raw intervals or the files containing them).  If createIntervalBinding
     *   is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
     *   usual set of RodBindings.  It also allows us in the future to be smart about tagging rods as intervals.  One other
     *   side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
     *   continue to input them out of order (whereas Tribble Features are ordered).
     *
     * @param parsingEngine      parsing engine
     * @param source             source
     * @param type               type to check
     * @param matches            matches
     * @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
     * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
     */
    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
        ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
        String value = getArgumentValue( defaultDefinition, matches );
        Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
            if ( tags.getPositionalTags().size() > 2 ) {
                throw new UserException.CommandLineException(
                        String.format("Unexpected number of positional tags for argument %s : %s. " +
-                                "Rod bindings only suport -X:type and -X:name,type argument styles",
+                                "Rod bindings only support -X:type and -X:name,type argument styles",
                                value, source.field.getName()));
            } if ( tags.getPositionalTags().size() == 2 ) {
                // -X:name,type style
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                        }
                    }
-                    if ( tribbleType == null )
+                    if ( tribbleType == null ) {
                        // IntervalBindings allow streaming conversion of Strings
                        if ( createIntervalBinding ) {
                            return new IntervalBinding(value);
                        }
                        if ( ! file.exists() ) {
                            throw new UserException.CouldNotReadInputFile(file, "file does not exist");
                        } else if ( ! file.canRead() || ! file.isFile() ) {
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                                            "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
                                            manager.userFriendlyListOfAvailableFeatures(parameterType)));
                        }
                    }
                }
            }
            Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
-            RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
+            Object result;
-            parsingEngine.addTags(result,tags);
+            if ( createIntervalBinding ) {
-            parsingEngine.addRodBinding(result);
+                result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
            } else {
                RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
                parsingEngine.addTags(rbind, tags);
                parsingEngine.addRodBinding(rbind);
                result = rbind;
            }
            return result;
        } catch (InvocationTargetException e) {
            throw new UserException.CommandLineException(
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    }
 }
 /**
 * Parser for RodBinding objects
 */
 class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    /**
     * We only want IntervalBinding class objects
     * @param type The type to check.
     * @return true if the provided class is an IntervalBinding.class
     */
    @Override
    public boolean supports( Class type ) {
        return isIntervalBinding(type);
    }
    public static boolean isIntervalBinding( Class type ) {
        return IntervalBinding.class.isAssignableFrom(type);
    }
    /**
     * See note from RodBindingArgumentTypeDescriptor.parse().
     *
     * @param parsingEngine      parsing engine
     * @param source             source
     * @param type               type to check
     * @param matches            matches
     * @return the IntervalBinding object.
     */
    @Override
    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
        return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
    }
 }
 /**
 * Parse simple argument types: java primitives, wrapper classes, and anything that has
 * a simple String constructor.
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
 class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    @Override
    public boolean supports( Class type ) {
-        if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
+        if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
        if ( type.isPrimitive() ) return true;
        if ( type.isEnum() ) return true;
        if ( primitiveToWrapperMap.containsValue(type) ) return true;
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.HelpFormatter;
 import java.io.IOException;
-import java.util.Collection;
+import java.util.*;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.Locale;
 public abstract class CommandLineProgram {
@ -155,6 +152,7 @@ public abstract class CommandLineProgram {
     *
     * @param clp  the command line program to execute
     * @param args the command line arguments passed in
     * @param dryRun dry run
     * @throws Exception when an exception occurs
     */
    @SuppressWarnings("unchecked")
@ -176,6 +174,8 @@ public abstract class CommandLineProgram {
            ParsingEngine parser = clp.parser = new ParsingEngine(clp);
            parser.addArgumentSource(clp.getClass());
            Map<ArgumentMatchSource, List<String>> parsedArgs;
            // process the args
            if (clp.canAddArgumentsDynamically()) {
                // if the command-line program can toss in extra args, fetch them and reparse the arguments.
@ -196,14 +196,14 @@ public abstract class CommandLineProgram {
                Class[] argumentSources = clp.getArgumentSources();
                for (Class argumentSource : argumentSources)
                    parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
-                parser.parse(args);
+                parsedArgs = parser.parse(args);
                if (isHelpPresent(parser))
                    printHelpAndExit(clp, parser);
                if ( ! dryRun ) parser.validate();
            } else {
-                parser.parse(args);
+                parsedArgs = parser.parse(args);
                if ( ! dryRun ) {
                    if (isHelpPresent(parser))
@ -230,7 +230,7 @@ public abstract class CommandLineProgram {
                }
                // regardless of what happens next, generate the header information
-                HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
+                HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs);
                // call the execute
                CommandLineProgram.result = clp.execute();
--- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java
@ -0,0 +1,108 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.commandline;
 import com.google.java.contract.Requires;
 import org.broad.tribble.Feature;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.readers.AsciiLineReader;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
 import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.*;
 /**
 * An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
 *
 * The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
 * the engine to construct intervals for traversal at runtime.  The IntervalBinding can
 * either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
 * The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
 *
 * Note that this class is immutable.
 */
 public final class IntervalBinding<T extends Feature> {
    private RodBinding<T> featureIntervals;
    private String stringIntervals;
    @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
    public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
        featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
    }
    @Requires({"intervalArgument != null"})
    public IntervalBinding(String intervalArgument) {
        stringIntervals = intervalArgument;
    }
    public String getSource() {
        if ( featureIntervals != null )
            return featureIntervals.getSource();
        return stringIntervals;
    }
    public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
        List<GenomeLoc> intervals;
        if ( featureIntervals != null ) {
            intervals = new ArrayList<GenomeLoc>();
            //RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
            //        toolkit.getGenomeLocParser(),
            //        toolkit.getArguments().unsafe);
            // TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
            FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
            if ( codec instanceof ReferenceDependentFeatureCodec )
                ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
            try {
                FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
                AsciiLineReader lineReader = new AsciiLineReader(fis);
                codec.readHeader(lineReader);
                String line = lineReader.readLine();
                while ( line != null ) {
                    intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(codec.decodeLoc(line)));
                    line = lineReader.readLine();
                }
            } catch (IOException e) {
                throw new UserException("Problem reading the interval file " + featureIntervals.getSource() + "; " + e.getMessage());
            }
        } else {
            intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
        }
        return intervals;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.commandline;
 import com.google.java.contract.Requires;
 import org.apache.commons.io.FileUtils;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.HelpFormatter;
 import java.io.File;
 import java.io.IOException;
 import java.lang.reflect.Field;
 import java.util.*;
@ -75,6 +78,7 @@ public class ParsingEngine {
     * The type of set used must be ordered (but not necessarily sorted).
     */
    private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
            new IntervalBindingArgumentTypeDescriptor(),
            new RodBindingArgumentTypeDescriptor(),
            new CompoundArgumentTypeDescriptor(),
            new MultiplexArgumentTypeDescriptor()) );
@ -100,6 +104,8 @@ public class ParsingEngine {
        if(clp != null)
            argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors());
        argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS);
        addArgumentSource(ParsingEngineArgumentFiles.class);
    }
    /**
@ -148,21 +154,43 @@ public class ParsingEngine {
     * command-line arguments to the arguments that are actually
     * required.
     * @param tokens Tokens passed on the command line.
     * @return The parsed arguments by file.
     */
-    public void parse( String[] tokens ) {
+    public SortedMap<ArgumentMatchSource, List<String>> parse( String[] tokens ) {
        argumentMatches = new ArgumentMatches();
        SortedMap<ArgumentMatchSource, List<String>> parsedArgs = new TreeMap<ArgumentMatchSource, List<String>>();
-        int lastArgumentMatchSite = -1;
+        List<String> cmdLineTokens = Arrays.asList(tokens);
        parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs);
-        for( int i = 0; i < tokens.length; i++ ) {
+        ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles();
-            String token = tokens[i];
+
        // Load the arguments ONLY into the argument files.
        // Validation may optionally run on the rest of the arguments.
        loadArgumentsIntoObject(argumentFiles);
        for (File file: argumentFiles.files) {
            List<String> fileTokens = getArguments(file);
            parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs);
        }
        return parsedArgs;
    }
    private void parse(ArgumentMatchSource matchSource, List<String> tokens,
                       ArgumentMatches argumentMatches, SortedMap<ArgumentMatchSource, List<String>> parsedArgs) {
        ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1);
        int i = 0;
        for (String token: tokens) {
            // If the token is of argument form, parse it into its own argument match.
            // Otherwise, pair it with the most recently used argument discovered.
            ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i);
            if( isArgumentForm(token) ) {
-                ArgumentMatch argumentMatch = parseArgument( token, i );
+                ArgumentMatch argumentMatch = parseArgument( token, site );
                if( argumentMatch != null ) {
                    argumentMatches.mergeInto( argumentMatch );
-                    lastArgumentMatchSite = i;
+                    lastArgumentMatchSite = site;
                }
            }
            else {
@ -170,10 +198,31 @@ public class ParsingEngine {
                    !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite))
                    argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token );
                else
-                    argumentMatches.MissingArgument.addValue( i, token );
+                    argumentMatches.MissingArgument.addValue( site, token );
            }
            i++;
        }
        parsedArgs.put(matchSource, tokens);
    }
    private List<String> getArguments(File file) {
        try {
            if (file.getAbsolutePath().endsWith(".list")) {
                return getListArguments(file);
            }
        } catch (IOException e) {
            throw new UserException.CouldNotReadInputFile(file, e);
        }
        throw new UserException.CouldNotReadInputFile(file, "file extension is not .list");
    }
    private List<String> getListArguments(File file) throws IOException {
        ArrayList<String> argsList = new ArrayList<String>();
        for (String line: FileUtils.readLines(file))
            argsList.addAll(Arrays.asList(Utils.escapeExpressions(line)));
        return argsList;
    }
    public enum ValidationType { MissingRequiredArgument,
@ -494,7 +543,7 @@ public class ParsingEngine {
     * @param position The position of the token in question.
     * @return ArgumentMatch associated with this token, or null if no match exists.
     */    
-    private ArgumentMatch parseArgument( String token, int position ) {
+    private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) {
        if( !isArgumentForm(token) )
            throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );
@ -579,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException {
    private static String formatArguments( ArgumentMatch invalidValues ) {
        StringBuilder sb = new StringBuilder();
-        for( int index: invalidValues.indices.keySet() )
+        for( ArgumentMatchSite site: invalidValues.sites.keySet() )
-            for( String value: invalidValues.indices.get(index) ) {
+            for( String value: invalidValues.sites.get(site) ) {
-                sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) );
+                switch (site.getSource().getType()) {
                    case CommandLine:
                        sb.append( String.format("%nInvalid argument value '%s' at position %d.",
                                value, site.getIndex()) );
                        break;
                    case File:
                        sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.",
                                value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) );
                        break;
                    default:
                        throw new RuntimeException( String.format("Unexpected argument match source type: %s",
                                site.getSource().getType()));
                }
                if(value != null && Utils.dupString(' ',value.length()).equals(value))
                    sb.append("  Please make sure any line continuation backslashes on your command line are not followed by whitespace.");
            }
@ -634,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException {
    private static String formatArguments(ArgumentDefinition definition, String argumentPassed) {
        return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions));
    }
-}
+}
 /**
 * Container class to store the list of argument files.
 * The files will be parsed after the command line arguments.
 */
 class ParsingEngineArgumentFiles {
    @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false)
    public List<File> files = new ArrayList<File>();
 }
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@ -68,7 +68,7 @@ public abstract class ParsingMethod {
     * @return An argument match.  Definition field will be populated if a match was found or
     *         empty if no appropriate definition could be found. 
     */
-    public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) {
+    public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) {
        // If the argument is valid, parse out the argument.
        Matcher matcher = pattern.matcher(token);
@ -102,9 +102,7 @@ public abstract class ParsingMethod {
        // Try to find a matching argument.  If found, label that as the match.  If not found, add the argument
        // with a null definition.
-        ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags);
+        return new ArgumentMatch(argument,argumentDefinition,position,tags);
        return argumentMatch;
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -28,34 +28,30 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.samtools.*;
 import org.apache.log4j.Logger;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.*;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
+import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.executive.MicroScheduler;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.stubs.Stub;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
 import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import java.io.File;
 import java.util.*;
@ -92,7 +88,7 @@ public class GenomeAnalysisEngine {
    /**
     * Accessor for sample metadata
     */
-    private SampleDataSource sampleDataSource = null;
+    private SampleDB sampleDB = null;
    /**
     * Accessor for sharded reference-ordered data.
@ -206,6 +202,9 @@ public class GenomeAnalysisEngine {
        // Prepare the data for traversal.
        initializeDataSources();
        // initialize sampleDB
        initializeSampleDB();
        // initialize and validate the interval list
        initializeIntervals();
        validateSuppliedIntervals();
@ -222,12 +221,12 @@ public class GenomeAnalysisEngine {
        ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
        // execute the microscheduler, storing the results
-        Object result =  microScheduler.execute(this.walker, shardStrategy);
+        return microScheduler.execute(this.walker, shardStrategy);
        //monitor.stop();
        //logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));
-        return result;
+        //return result;
    }
    /**
@ -259,13 +258,12 @@ public class GenomeAnalysisEngine {
     * @return A collection of available filters.
     */
    public Collection<ReadFilter> createFilters() {
-        Set<ReadFilter> filters = new HashSet<ReadFilter>();
+        final List<ReadFilter> filters = WalkerManager.getReadFilters(walker,this.getFilterManager());
        filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
        if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
            filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
-        for(String filterName: this.getArguments().readFilters)
+        for(final String filterName: this.getArguments().readFilters)
            filters.add(this.getFilterManager().createByName(filterName));
-        return Collections.unmodifiableSet(filters);
+        return Collections.unmodifiableList(filters);
    }
    /**
@ -299,10 +297,14 @@ public class GenomeAnalysisEngine {
        else if(WalkerManager.getDownsamplingMethod(walker) != null)
            method = WalkerManager.getDownsamplingMethod(walker);
        else
-            method = argCollection.getDefaultDownsamplingMethod();
+            method = GATKArgumentCollection.getDefaultDownsamplingMethod();
        return method;
    }
    protected void setDownsamplingMethod(DownsamplingMethod method) {
        argCollection.setDownsamplingMethod(method);
    }
    public BAQ.QualityMode getWalkerBAQQualityMode()         { return WalkerManager.getBAQQualityMode(walker); }
    public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }    
@ -381,18 +383,18 @@ public class GenomeAnalysisEngine {
        // If intervals is non-null and empty at this point, it means that the list of intervals to process
        // was filtered down to an empty set (eg., the user specified something like -L chr1 -XL chr1). Since
        // this was very likely unintentional, the user should be informed of this. Note that this is different
-        // from the case where intervals == null, which indicates either that there were no interval arguments,
+        // from the case where intervals == null, which indicates that there were no interval arguments.
        // or that -L all was specified.
        if ( intervals != null && intervals.isEmpty() ) {
-            throw new ArgumentException("The given combination of -L and -XL options results in an empty set. " +
+            logger.warn("The given combination of -L and -XL options results in an empty set.  No intervals to process.");
                                        "No intervals to process.");
        }
    }
    /**
     * Get the sharding strategy given a driving data source.
     *
     * @param readsDataSource readsDataSource
     * @param drivingDataSource Data on which to shard.
     * @param intervals intervals
     * @return the sharding strategy
     */
    protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
@ -429,7 +431,7 @@ public class GenomeAnalysisEngine {
            return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
        }
-        ShardStrategy shardStrategy = null;
+        ShardStrategy shardStrategy;
        ShardStrategyFactory.SHATTER_STRATEGY shardType;
        long SHARD_SIZE = 100000L;
@ -438,6 +440,8 @@ public class GenomeAnalysisEngine {
            if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
            if (intervals != null && !intervals.isEmpty()) {
                if (readsDataSource == null)
                    throw new IllegalArgumentException("readsDataSource is null");
                if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data.  Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
@ -501,7 +505,8 @@ public class GenomeAnalysisEngine {
     */
    private void initializeTempDirectory() {
        File tempDir = new File(System.getProperty("java.io.tmpdir"));
-        tempDir.mkdirs();
+        if (!tempDir.exists() && !tempDir.mkdirs())
            throw new UserException.BadTmpDir("Unable to create directory");
    }
    /**
@ -566,34 +571,23 @@ public class GenomeAnalysisEngine {
    protected void initializeIntervals() {
        // return if no interval arguments at all
-        if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
+        if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
            return;
-        // if '-L all' was specified, verify that it was the only -L specified and return if so.
+        // Note that the use of '-L all' is no longer supported.
        if(argCollection.intervals != null) {
            for(String interval: argCollection.intervals) {
                if(interval.trim().equals("all")) {
                    if(argCollection.intervals.size() > 1)
                        throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
                    // '-L all' was specified and seems valid.  Return.
                    return;
                }
            }
        }
        // if include argument isn't given, create new set of all possible intervals
-        GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
+        GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
            GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
-            loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
+            loadIntervals(argCollection.intervals, argCollection.intervalSetRule));
        // if no exclude arguments, can return parseIntervalArguments directly
-        if (argCollection.excludeIntervals == null)
+        if ( argCollection.excludeIntervals == null )
            intervals = includeSortedSet;
-            // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
+        // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
        else {
-            GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
+            GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
            intervals = includeSortedSet.subtractRegions(excludeSortedSet);
            // logging messages only printed when exclude (-XL) arguments are given
@ -608,47 +602,26 @@ public class GenomeAnalysisEngine {
    /**
     * Loads the intervals relevant to the current execution
-     * @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
+     * @param argList  argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
-     *                notation, or a combination of the above
+     * @param rule     interval merging rule
     * @param rodIntervals a list of ROD intervals to add to the returned set.  Can be empty or null.
     * @return A sorted, merged list of all intervals specified in this arg list.
     */
-    protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
+    protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {
-        boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
+        List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
-                                          argCollection.unsafe == ValidationExclusion.TYPE.ALL);
+        for ( IntervalBinding intervalBinding : argList ) {
            List<GenomeLoc> intervals = intervalBinding.getIntervals(this);
-        List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
+            if ( intervals.isEmpty() ) {
-        List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
+                logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
            }
            allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
        }
        return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
    }
    /**
     * if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
     * @return ROD intervals as GenomeLocs
     */
    private List<GenomeLoc> getRODIntervals() {
        Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
        // Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
        List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
        if (rodNames != null && argCollection.RODToInterval != null) {
            String rodName = argCollection.RODToInterval;
            // check to make sure we have a rod of that name
            if (!rodNames.containsKey(rodName))
                throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
            for (String str : rodNames.keySet())
                if (str.equals(rodName)) {
                    logger.info("Adding interval list from track (ROD) named " + rodName);
                    RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
                    ret.addAll(intervalGenerator.toGenomeLocList());
                }
        }
        return ret;
    }
    /**
     * Add additional, externally managed IO streams for inputs.
     *
@ -692,12 +665,22 @@ public class GenomeAnalysisEngine {
        for (ReadFilter filter : filters)
            filter.initialize(this);
        sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
        // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
        rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
    }
    /**
     * Entry-point function to initialize the samples database from input data and pedigree arguments
     */
    private void initializeSampleDB() {
        SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
        sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
        sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
        sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
        sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
        sampleDB = sampleDBBuilder.getFinalSampleDB();
    }
    /**
     * Gets a unique identifier for the reader sourcing this read.
     * @param read Read to examine.
@ -716,106 +699,13 @@ public class GenomeAnalysisEngine {
        return getReadsDataSource().getSAMFile(id);
    }
    /**
     * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
     * returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
     * found in the corresponding bam file.
     *
     * @return Sets of samples in the merged input SAM stream, grouped by readers
     */
    public List<Set<String>> getSamplesByReaders() {
        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
        List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
        for (SAMReaderID r : readers) {
            Set<String> samples = new HashSet<String>(1);
            sample_sets.add(samples);
            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
                samples.add(g.getSample());
            }
        }
        return sample_sets;
    }
    /**
     * Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
     * returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
     * found in the corresponding bam file.
     *
     * @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
     */
    public List<Set<String>> getLibrariesByReaders() {
        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
        List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
        for (SAMReaderID r : readers) {
            Set<String> libs = new HashSet<String>(2);
            lib_sets.add(libs);
            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
                libs.add(g.getLibrary());
            }
        }
        return lib_sets;
    }
    /**
     * **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
     *
     * Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
     * returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
     * (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
     *
     * @return sets of (merged) read group ids in order of input bams
     */
    public List<Set<String>> getMergedReadGroupsByReaders() {
        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
        List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
        for (SAMReaderID r : readers) {
            Set<String> groups = new HashSet<String>(5);
            rg_sets.add(groups);
            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
                if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
                    // use HeaderMerger to translate original read group id from the reader into the read group id in the
                    // merged stream, and save that remapped read group id to associate it with specific reader
                    groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
                } else {
                    // otherwise, pass through the unmapped read groups since this is what Picard does as well
                    groups.add(g.getReadGroupId());
                }
            }
        }
        return rg_sets;
    }
    /**
     * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
     *
     * @param reads     Reads data source.
     * @param reference Reference data source.
     * @param rods    a collection of the reference ordered data tracks
     * @param manager manager
     */
    private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
        if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
@ -844,15 +734,22 @@ public class GenomeAnalysisEngine {
    /**
     * Gets a data source for the given set of reads.
     *
     * @param argCollection arguments
     * @param genomeLocParser parser
     * @param refReader reader
     * @return A data source for the given set of reads.
     */
    private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
        DownsamplingMethod method = getDownsamplingMethod();
        // Synchronize the method back into the collection so that it shows up when
        // interrogating for the downsample method during command line recreation.
        setDownsamplingMethod(method);
        if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
            throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");
-        SAMDataSource dataSource = new SAMDataSource(
+        return new SAMDataSource(
                samReaderIDs,
                genomeLocParser,
                argCollection.useOriginalBaseQualities,
@ -868,14 +765,12 @@ public class GenomeAnalysisEngine {
                refReader,
                argCollection.defaultBaseQualities,
                !argCollection.disableLowMemorySharding);
        return dataSource;
    }
    /**
     * Opens a reference sequence file paired with an index.  Only public for testing purposes
     *
     * @param refFile Handle to a reference sequence file.  Non-null.
     * @return A thread-safe file wrapper.
     */
    public void setReferenceDataSource(File refFile) {
        this.referenceDataSource = new ReferenceDataSource(refFile);
@ -929,6 +824,26 @@ public class GenomeAnalysisEngine {
        return readsDataSource.getHeader(reader);
    }
    /**
     * Returns an ordered list of the unmerged SAM file headers known to this engine.
     * @return list of header for each input SAM file, in command line order
     */
    public List<SAMFileHeader> getSAMFileHeaders() {
        final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
        for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
            headers.add(getReadsDataSource().getHeader(id));
        }
        return headers;
    }
    /**
     * Gets the master sequence dictionary for this GATK engine instance
     * @return a never-null dictionary listing all of the contigs known to this engine instance
     */
    public SAMSequenceDictionary getMasterSequenceDictionary() {
        return getReferenceDataSource().getReference().getSequenceDictionary();
    }
    /**
     * Returns data source object encapsulating all essential info and handlers used to traverse
     * reads; header merger, individual file readers etc can be accessed through the returned data source object.
@ -939,8 +854,6 @@ public class GenomeAnalysisEngine {
        return this.readsDataSource;
    }
    /**
     * Sets the collection of GATK main application arguments.
     *
@ -1027,140 +940,14 @@ public class GenomeAnalysisEngine {
        return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
    }
-    public SampleDataSource getSampleMetadata() {
+    // -------------------------------------------------------------------------------------
-        return this.sampleDataSource;
+    //
-    }
+    // code for working with Samples database
    //
    // -------------------------------------------------------------------------------------
-    /**
+    public SampleDB getSampleDB() {
-     * Get a sample by its ID
+        return this.sampleDB;
     * If an alias is passed in, return the main sample object
     * @param id sample id
     * @return sample Object with this ID
     */
    public Sample getSampleById(String id) {
        return sampleDataSource.getSampleById(id);
    }
    /**
     * Get the sample for a given read group
     * Must first look up ID for read group
     * @param readGroup of sample
     * @return sample object with ID from the read group
     */
    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
        return sampleDataSource.getSampleByReadGroup(readGroup);
    }
    /**
     * Get a sample for a given read
     * Must first look up read group, and then sample ID for that read group
     * @param read of sample
     * @return sample object of this read
     */
    public Sample getSampleByRead(SAMRecord read) {
        return getSampleByReadGroup(read.getReadGroup());
    }
    /**
     * Get number of sample objects
     * @return size of samples map
     */
    public int sampleCount() {
        return sampleDataSource.sampleCount();
    }
    /**
     * Return all samples with a given family ID
     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
     * @param familyId family ID
     * @return Samples with the given family ID
     */
    public Set<Sample> getFamily(String familyId) {
        return sampleDataSource.getFamily(familyId);
    }
    /**
     * Returns all children of a given sample
     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
     * @param sample parent sample
     * @return children of the given sample
     */
    public Set<Sample> getChildren(Sample sample) {
        return sampleDataSource.getChildren(sample);
    }
    /**
     * Gets all the samples
     * @return
     */
    public Collection<Sample> getSamples() {
        return sampleDataSource.getSamples();
    }
    /**
     * Takes a list of sample names and returns their corresponding sample objects
     *
     * @param sampleNameList List of sample names
     * @return Corresponding set of samples
     */
    public Set<Sample> getSamples(Collection<String> sampleNameList) {
 	return sampleDataSource.getSamples(sampleNameList);
    }
    /**
     * Returns a set of samples that have any value (which could be null) for a given property
     * @param key Property key
     * @return Set of samples with the property
     */
    public Set<Sample> getSamplesWithProperty(String key) {
        return sampleDataSource.getSamplesWithProperty(key);
    }
    /**
     * Returns a set of samples that have a property with a certain value
     * Value must be a string for now - could add a similar method for matching any objects in the future
     *
     * @param key Property key
     * @param value String property value
     * @return Set of samples that match key and value
     */
    public Set<Sample> getSamplesWithProperty(String key, String value) {
        return sampleDataSource.getSamplesWithProperty(key, value);
    }
    /**
     * Returns a set of sample objects for the sample names in a variant context
     *
     * @param context Any variant context
     * @return a set of the sample objects
     */
    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
        Set<Sample> samples = new HashSet<Sample>();
        for (String sampleName : context.getSampleNames()) {
            samples.add(sampleDataSource.getOrCreateSample(sampleName));
        }
        return samples;
    }
    /**
     * Returns all samples that were referenced in the SAM file
     */
    public Set<Sample> getSAMFileSamples() {
        return sampleDataSource.getSAMFileSamples();
    }
    /**
     * Return a subcontext restricted to samples with a given property key/value
     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
     * @param context VariantContext to filter
     * @param key property key
     * @param value property value (must be string)
     * @return subcontext
     */
    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
        return sampleDataSource.subContextFromSampleProperty(context, key, value);
    }
    public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.TreeMap;
 /**
 * Holds a bunch of basic information about the traversal.
@ -102,8 +103,12 @@ public class ReadMetrics implements Cloneable {
        counter.put(filter.getClass(), c + 1L);
    }
-    public Map<Class,Long> getCountsByFilter() {
+    public Map<String,Long> getCountsByFilter() {
-        return Collections.unmodifiableMap(counter);
+        final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
        for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
            sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
        }
        return sortedCounts;
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
@ -29,13 +29,11 @@ package org.broadinstitute.sting.gatk.arguments;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.simpleframework.xml.*;
 /**
 * @author ebanks
 * @version 1.0
 */
@Root
 public class DbsnpArgumentCollection {
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -26,34 +26,26 @@
 package org.broadinstitute.sting.gatk.arguments;
 import net.sf.samtools.SAMFileReader;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.IntervalBinding;
 import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
 import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.simpleframework.xml.*;
 import org.simpleframework.xml.core.Persister;
 import org.simpleframework.xml.stream.Format;
 import org.simpleframework.xml.stream.HyphenStyle;
 import java.io.File;
-import java.io.InputStream;
+import java.util.*;
 import java.io.PrintStream;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 /**
 * @author aaron
 * @version 1.0
 */
@Root
 public class GATKArgumentCollection {
    /* our version number */
@ -64,58 +56,58 @@ public class GATKArgumentCollection {
    public GATKArgumentCollection() {
    }
    @ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required = false)
    public Map<String, String> walkerArgs = new HashMap<String, String>();
    // parameters and their defaults
    @ElementList(required = false)
    @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
    public List<String> samFiles = new ArrayList<String>();
    // parameters and their defaults
    @ElementList(required = false)
    @Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
    public List<File> sampleFiles = new ArrayList<File>();
    @Element(required = false)
    @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
    public Integer readBufferSize = null;
    @Element(required = false)
    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate?  Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
    public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;
-    @ElementList(required = false)
+    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
    public List<String> readFilters = new ArrayList<String>();
-    @ElementList(required = false)
+    /**
-    @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
+     * Using this option one can instruct the GATK engine to traverse over only part of the genome.  This argument can be specified multiple times.
-    public List<String> intervals = null;
+     * One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
     * Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
     */
    @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
    public List<IntervalBinding<Feature>> intervals = null;
-    @ElementList(required = false)
+    /**
-    @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
+     * Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome.  This argument can be specified multiple times.
-    public List<String> excludeIntervals = null;
+     * One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
     * Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
     */
    @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
    public List<IntervalBinding<Feature>> excludeIntervals = null;
    /**
     * How should the intervals specified by multiple -L or -XL arguments be combined?  Using this argument one can, for example, traverse over all of the positions
     * for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
     */
    @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
    public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
    /**
     * Should abutting (but not overlapping) intervals be treated as separate intervals?
     */
    @Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
    public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
    @Element(required = false)
    @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
    public File referenceFile = null;
    @Deprecated
    @Hidden
    @ElementList(required = false)
    @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
    public ArrayList<String> RODBindings = new ArrayList<String>();
    @Element(required = false)
    @Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
    public String RODToInterval = null;
    @Element(required = false)
    @Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
    public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
    @Element(required = false)
    @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
    public boolean nonDeterministicRandomSeed = false;
@ -128,22 +120,19 @@ public class GATKArgumentCollection {
    private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
    private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;
-    @Element(required = false)
+    @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus.  Reads will be selected randomly to be removed from the pile based on the method described here", required = false)
    @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus.  Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
    public DownsampleType downsamplingType = null;
    @Element(required = false)
    @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
    public Double downsampleFraction = null;
    @Element(required = false)
    @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
    public Integer downsampleCoverage = null;
    /**
     * Gets the downsampling method explicitly specified by the user.  If the user didn't specify
-     * a default downsampling mechanism, return null.
+     * a default downsampling mechanism, return the default.
-     * @return The explicitly specified downsampling mechanism, or null if none exists.
+     * @return The explicitly specified downsampling mechanism, or the default if none exists.
     */
    public DownsamplingMethod getDownsamplingMethod() {
        if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
@ -153,16 +142,26 @@ public class GATKArgumentCollection {
        return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
    }
    /**
     * Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments.
     * @param method The downsampling mechanism.
     */
    public void setDownsamplingMethod(DownsamplingMethod method) {
        if (method == null)
            throw new IllegalArgumentException("method is null");
        downsamplingType = method.type;
        downsampleCoverage = method.toCoverage;
        downsampleFraction = method.toFraction;
    }
    // --------------------------------------------------------------------------------------------------------------
    //
    // BAQ arguments
    //
    // --------------------------------------------------------------------------------------------------------------
    @Element(required = false)
    @Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
    public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;
    @Element(required = false)
    @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets", required = false)
    public double BAQGOP = BAQ.DEFAULT_GOP;
@ -171,7 +170,6 @@ public class GATKArgumentCollection {
    // performance log arguments
    //
    // --------------------------------------------------------------------------------------------------------------
    @Element(required = false)
    @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
    public File performanceLog = null;
@ -184,67 +182,117 @@ public class GATKArgumentCollection {
        return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
    }
    @Element(required = false)
    @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
    public Boolean useOriginalBaseQualities = false;
    @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
    public byte defaultBaseQualities = -1;
    @Element(required = false)
    @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
    public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;
    @Element(required = false)
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;
-    /** How many threads should be allocated to this analysis. */
+    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis", required = false)
    @Element(required = false)
    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
    public int numberOfThreads = 1;
-    /** What rule should we use when merging intervals */
+    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line", required = false)
    @Element(required = false)
    @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
    public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
    @ElementList(required = false)
    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
    public List<String> readGroupBlackList = null;
    // --------------------------------------------------------------------------------------------------------------
    //
-    // distributed GATK arguments
+    // PED (pedigree) support
    //
    // --------------------------------------------------------------------------------------------------------------
    @Element(required=false)
    @Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
    @Hidden
    public File processingTrackerFile = null;
-    @Element(required=false)
+    /**
-    @Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
+     * <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
-    @Hidden
+     * processed in the GATK.</p>
-    public boolean restartProcessingTracker = false;
+     *
     * <ul>
     *  <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
     *  <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
     * </ul>
     *
     * <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
     *
     * <ul>
     *  <li>Family ID</li>
     *  <li>Individual ID</li>
     *  <li>Paternal ID</li>
     *  <li>Maternal ID</li>
     *  <li>Sex (1=male; 2=female; other=unknown)</li>
     *  <li>Phenotype</li>
     * </ul>
     *
     *  <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
     *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
     *  quantitative trait or an affection status column: GATK will automatically detect which type
     *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
     *
     *  <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
     *
     *  <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
     *  line will be ignored. Do not start any family IDs with this character therefore.</p>
     *
     *  <p>Affection status should be coded:</p>
     *
     * <ul>
     *  <li>-9 missing</li>
     *   <li>0 missing</li>
     *   <li>1 unaffected</li>
     *   <li>2 affected</li>
     * </ul>
     *
     * <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
     * to phenotype values are interpreted as string phenotype values.  In this case -9 uniquely
     * represents the missing value.</p>
     *
     * <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
     *
     * <p>For example, here are two individuals (one row = one person):</p>
     *
     * <pre>
     *   FAM001  1  0 0  1  2
     *   FAM001  2  0 0  1  2
     * </pre>
     *
     * <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
     * tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
     *
     * <p>Note that most GATK walkers do not use pedigree information.  Walkers that require pedigree
     * data should clearly indicate so in their arguments and will throw errors if required pedigree
     * information is missing.</p>
     */
    @Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
    public List<File> pedigreeFiles = Collections.emptyList();
-    @Element(required=false)
+    /**
-    @Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file.  For debugging, only",required=false)
+     * Inline PED records (see -ped argument).  Each -pedString STRING can contain one or more
-    @Hidden
+     * valid PED records (see -ped) separated by semi-colons.  Supports all tags for each pedString
-    public File processingTrackerStatusFile = null;
+     * as -ped supports
     */
    @Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
    public List<String> pedigreeStrings = Collections.emptyList();
-    @Element(required=false)
+    /**
-    @Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
+     * How strict should we be in parsing the PED files?
-    @Hidden
+     */
-    public int processTrackerID = -1;
+    @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
    public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
    // --------------------------------------------------------------------------------------------------------------
    //
    // BAM indexing and sharding arguments
    //
    // --------------------------------------------------------------------------------------------------------------
    @Element(required = false)
    @Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM.  NO INTEGRATION TESTS are available.  Use at your own risk.",required=false)
    @Hidden
    public boolean allowIntervalsWithUnindexedBAM = false;
-    @Element(required = false)
+    @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality",required=false)
    @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality.",required=false)
    public boolean disableLowMemorySharding = false;
    // --------------------------------------------------------------------------------------------------------------
@ -253,69 +301,6 @@ public class GATKArgumentCollection {
    //
    // --------------------------------------------------------------------------------------------------------------
    /**
     * marshal the data out to a object
     *
     * @param collection the GATKArgumentCollection to load into
     * @param outputFile the file to write to
     */
    public static void marshal(GATKArgumentCollection collection, String outputFile) {
        Serializer serializer = new Persister(new Format(new HyphenStyle()));
        File result = new File(outputFile);
        try {
            serializer.write(collection, result);
        } catch (Exception e) {
            throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
        }
    }
    /**
     * marshal the data out to a object
     *
     * @param collection the GATKArgumentCollection to load into
     * @param outputFile the stream to write to
     */
    public static void marshal(GATKArgumentCollection collection, PrintStream outputFile) {
        Serializer serializer = new Persister(new Format(new HyphenStyle()));
        try {
            serializer.write(collection, outputFile);
        } catch (Exception e) {
            throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
        }
    }
    /**
     * unmashall the object from a configuration file
     *
     * @param filename the filename to marshal from
     */
    public static GATKArgumentCollection unmarshal(String filename) {
        Serializer serializer = new Persister(new Format(new HyphenStyle()));
        File source = new File(filename);
        try {
            GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
            return example;
        } catch (Exception e) {
            throw new ReviewedStingException("Failed to marshal the data from file " + filename, e);
        }
    }
    /**
     * unmashall the object from a configuration file
     *
     * @param file the inputstream to marshal from
     */
    public static GATKArgumentCollection unmarshal(InputStream file) {
        Serializer serializer = new Persister(new Format(new HyphenStyle()));
        try {
            GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, file);
            return example;
        } catch (Exception e) {
            throw new ReviewedStingException("Failed to marshal the data from file " + file.toString(), e);
        }
    }
    /**
     * test equality between two arg collections.  This function defines the statement:
     * "not fun to write"
@ -363,7 +348,7 @@ public class GATKArgumentCollection {
        if (!other.referenceFile.equals(this.referenceFile)) {
            return false;
        }
-        if (!other.intervals.equals(this.intervals)) {
+        if ((other.intervals == null && this.intervals != null) || !other.intervals.equals(this.intervals)) {
            return false;
        }
        if (!other.excludeIntervals.equals(this.excludeIntervals)) {
@ -386,39 +371,21 @@ public class GATKArgumentCollection {
        if (other.intervalMerging != this.intervalMerging) {
            return false;
        }
        if ((other.RODToInterval == null && RODToInterval != null) ||
            (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
            return false;
        }
        if (other.phoneHomeType != this.phoneHomeType) {
            return false;
        }
-        if (BTIMergeRule != other.BTIMergeRule)
+        if (intervalSetRule != other.intervalSetRule)
            return false;
-        if ( BAQMode != other.BAQMode) return false;
+        if ( BAQMode != other.BAQMode ) return false;
        if ( BAQGOP != other.BAQGOP ) return false;
        if ((other.performanceLog == null && this.performanceLog != null) ||
                (other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
            return false;
        if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
                (other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
            return false;
        if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
                (other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
            return false;
        if ( restartProcessingTracker != other.restartProcessingTracker )
            return false;
        if ( processTrackerID != other.processTrackerID )
            return false;
        if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
            return false;
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
@ -28,13 +28,11 @@ package org.broadinstitute.sting.gatk.arguments;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.simpleframework.xml.Root;
 /**
 * @author ebanks
 * @version 1.0
 */
@Root
 public class StandardVariantContextInputArgumentCollection {
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
@ -37,7 +37,6 @@ public class ValidationExclusion {
    public enum TYPE {
        ALLOW_UNINDEXED_BAM,        // allow bam files that do not have an index; we'll traverse them using monolithic shard
        ALLOW_EMPTY_INTERVAL_LIST,  // allow the user to pass in an empty interval list
        ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
        NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
        ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
@ -25,12 +25,12 @@
 package org.broadinstitute.sting.gatk.contexts;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.HasGenomeLocation;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.List;
@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
     */
    @Deprecated
    //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
-    public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
+    public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }
    /**
     * Are there any reads associated with this locus?
@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
     * @return
     */
    public boolean hasReads() {
-        return basePileup != null && basePileup.size() > 0 ;
+        return basePileup != null && basePileup.getNumberOfElements() > 0 ;
    }
    /**
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
     * @return
     */
    public int size() {
-        return basePileup.size();
+        return basePileup.getNumberOfElements();
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.contexts;
 import net.sf.samtools.SAMReadGroupRecord;
 import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
        return splitContextBySampleName(context, null);
    }
    public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
        Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
        for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
            m.put(new Sample(entry.getKey()), entry.getValue());
        }
        return m;
    }
    /**
     * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
     * of sample object.
@ -97,11 +88,11 @@ public class AlignmentContextUtils {
        GenomeLoc loc = context.getLocation();
        HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();
-        for(String sample: context.getPileup().getSampleNames()) {
+        for(String sample: context.getPileup().getSamples()) {
-            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
+            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);
            // Don't add empty pileups to the split context.
-            if(pileupBySample.size() == 0)
+            if(pileupBySample.getNumberOfElements() == 0)
                continue;
            if(sample != null)
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java
@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.datasources.providers;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.Collections;
 import java.util.List;
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
     * @param site Site at which to create the blank locus context.
     * @return empty context.
     */
-    private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
+    private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
    private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
    private AlignmentContext createEmptyLocus( GenomeLoc site ) {
        return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
     */
    public FilePointer next() {
        FilePointer current = wrappedIterator.next();
-        while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
            current = current.combine(parser,wrappedIterator.next());
        return current;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.baq.BAQSamIterator;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;
 import java.io.File;
 import java.lang.reflect.InvocationTargetException;
@ -57,6 +58,8 @@ import java.util.*;
 * Converts shards to SAM iterators over the specified region
 */
 public class SAMDataSource {
    final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
    /** Backing support for reads. */
    protected final ReadProperties readProperties;
@ -235,6 +238,12 @@ public class SAMDataSource {
        for(SAMFileReader reader: readers.values()) {
            // Get the sort order, forcing it to coordinate if unsorted.
            SAMFileHeader header = reader.getFileHeader();
            if ( header.getReadGroups().isEmpty() ) {
                throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile,
                        "SAM file doesn't have any read groups defined in the header.  The GATK no longer supports SAM files without read groups");
            }
            SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate;
            // Validate that all input files are sorted in the same order.
@ -638,7 +647,9 @@ public class SAMDataSource {
                                                        BAQ.QualityMode qmode,
                                                        IndexedFastaSequenceFile refReader,
                                                        byte defaultBaseQualities) {
-        wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
+        if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
            // only wrap if we are replacing the original qualitiies or using a default base quality
            wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
        // NOTE: this (and other filtering) should be done before on-the-fly sorting
        //  as there is no reason to sort something that we will end of throwing away
@ -750,6 +761,7 @@ public class SAMDataSource {
        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
            for(SAMReaderID readerID: readerIDs) {
                SAMFileReader reader = new SAMFileReader(readerID.samFile);
                reader.setSAMRecordFactory(factory);
                reader.enableFileSource(true);
                reader.enableIndexMemoryMapping(false);
                if(!enableLowMemorySharding)
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
        // intervals
        GenomeLocSortedSet intervalSortedSet = null;
        if(intervals != null)
-            intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
+            intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
        else {
            intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
            for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
@ -1,30 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Aug 12, 2010
 * Time: 2:09:16 PM
 */
 public class PropertyDefinition {
    String property;
    String[] values;
    public String getProperty() {
        return property;
    }
    public void setProperty(String property) {
        this.property = property;
    }
    public String[] getValues() {
        return values;
    }
    public void setValues(String[] values) {
        this.values = values;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
@ -1,203 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Jul 26, 2010
 * Time: 3:31:38 PM
 */
 public class Sample implements java.io.Serializable {
    private final String id;
    private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
    private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
    private HashMap<String, Object> properties = new HashMap<String, Object>();
    private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
    public enum Gender {
        MALE,
        FEMALE,
        UNKNOWN
    }
    public Sample(String id) {
 /*        if (id == null) {
            throw new StingException("Error creating sample: sample ID cannot be null");
        }*/
        this.id = id;
    }
    public String getId() {
        return this.id;
    }
    public Map<String, Object> getProperties() {
        return properties;
    }
    public void setProperties(Map<String, Object> properties) {
        this.properties = (HashMap) properties;
    }
    public Map<String,Sample> getRelationships() {
        return Collections.unmodifiableMap(this.relationships);
    }
    public void setSampleFileEntry(boolean value) {
        this.hasSampleFileEntry = value;
    }
    public boolean hasSAMFileEntry() {
        return this.hasSAMFileEntry;
    }
    public void setSAMFileEntry(boolean value) {
        this.hasSAMFileEntry = value;
    }
    public boolean hasSampleFileEntry() {
        return this.hasSampleFileEntry;
    }
    /**
     * Get one property
     * @param key key of property
     * @return value of property as generic object
     */
    public Object getProperty(String key) {
        return properties.get(key);
    }
    /**
     * Set a property
     * If property already exists, it is overwritten
     * @param key key of property
     * @param value object to be stored in properties array
     */
    public void setProperty(String key, Object value) {
        if (relationships.containsKey(key)) {
            throw new StingException("The same key cannot exist as a property and a relationship");
        }
        if (key.equals("gender") && value.getClass() != Gender.class) {
            throw new StingException("'gender' property must be of type Sample.Gender");
        }
        if (key.equals("population") && value.getClass() != String.class) {
            throw new StingException("'population' property must be of type String");
        }
        properties.put(key, value);
    }
    /**
     * Get one relationship
     * @param key of relationship
     * @return Sample object that this relationship points to
     */
    public Sample getRelationship(String key) {
        return relationships.get(key);
    }
    /**
     * Set one relationship
     * If already set, it is overwritten
     * @param key key of the relationship
     * @param value Sample object this relationship points to
     */
    public void setRelationship(String key, Sample value) {
        if (properties.containsKey(key)) {
            throw new StingException("The same key cannot exist as a property and a relationship");
        }
        relationships.put(key, value);
    }
    /**
     * Get the sample's mother
     * @return sample object with relationship mother, if exists, or null
     */
    public Sample getMother() {
        return getRelationship("mother");
    }
    /**
     * Get the sample's father
     * @return sample object with relationship father, if exists, or null
     */
    public Sample getFather() {
        return getRelationship("father");
    }
    /**
     * Get gender of the sample
     * @return property of key "gender" - must be of type Gender
     */
    public Gender getGender() {
        return (Gender) properties.get("gender");
    }
    public String getPopulation() {
        return (String) properties.get("population");
    }
    public String getFamilyId() {
        return (String) properties.get("familyId");
    }
    /**
     * @return True if sample is male, false if female, unknown, or null
     */
    public boolean isMale() {
        return properties.get("gender") == Gender.MALE;
    }
    /**
     * @return True if sample is female, false if male, unknown or null
     */
    public boolean isFemale() {
        return properties.get("gender") == Gender.MALE;
    }
    /**
     *
     * @param key property key
     * @return true if sample has this property (even if its value is null)
     */
    public boolean hasProperty(String key) {
        return properties.containsKey(key);
    }
    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;
        Sample sample = (Sample) o;
        if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
        if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
        if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
        if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
        if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
            return false;
        return true;
    }
    @Override
    public int hashCode() {
        return id != null ? id.hashCode() : "".hashCode();
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
@ -1,31 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Aug 13, 2010
 * Time: 5:13:46 PM
 */
 public class SampleAlias {
    String mainId;
    String[] otherIds;
    public String getMainId() {
        return mainId;
    }
    public void setMainId(String mainId) {
        this.mainId = mainId;
    }
    public String[] getOtherIds() {
        return otherIds;
    }
    public void setOtherIds(String[] otherIds) {
        this.otherIds = otherIds;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
@ -1,590 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.yaml.snakeyaml.TypeDescription;
 import org.yaml.snakeyaml.Yaml;
 import org.yaml.snakeyaml.constructor.Constructor;
 import java.io.BufferedReader;
 import java.io.File;
 import java.io.FileReader;
 import java.io.IOException;
 import java.util.*;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Jul 26, 2010
 * Time: 3:30:09 PM
 *
 * This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
 * in the GATK by the "--samples" argument. This class reads and parses those files.
 *
 * Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
 * only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
 * wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
 *
 */
 public class SampleDataSource {
    /**
     * SAMFileHeader that has been created for this analysis.
     */
    private SAMFileHeader header;
    /**
     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
     * this is stored as a HashMap.
     */
    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
    /**
     * Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
     * datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
     *
     * This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
     */
    private HashMap<String, String> sampleAliases = new HashMap<String, String>();
    /**
     * While loading sample files, we must be aware of "special" properties and relationships that are always allowed
     */
    public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
    public static final String[] specialRelationships = new String[] {"mother", "father"};
    /**
     * Constructor takes both a SAM header and sample files because the two must be integrated.
     * @param header SAMFileHeader that has been created for this analysis
     * @param sampleFiles Sample files that were included on the command line
     */
    public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
        this();
        this.header = header;
        // create empty sample object for each sample referenced in the SAM header
        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
            if (!hasSample(sampleName)) {
                Sample newSample = new Sample(sampleName);
                newSample.setSAMFileEntry(true);
                samples.put(sampleName, newSample);
            }
        }
        // add files consecutively
        if (sampleFiles != null) {
            for (File file : sampleFiles) {
                addFile(file);
            }
        }
    }
    public SampleDataSource() {
        samples.put(null, new Sample(null));
    }
    /**
     * Hallucinates sample objects for all the samples in the SAM file and stores them
     */
    public void addSamplesFromSAMHeader(SAMFileHeader header) {
        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
            if (!hasSample(sampleName)) {
                Sample newSample = new Sample(sampleName);
                newSample.setSAMFileEntry(true);
                samples.put(sampleName, newSample);
            }
        }
    }
    /**
     * Parse one sample file and integrate it with samples that are already there
     * Fail quickly if we find any errors in the file
     */
    public void addFile(File sampleFile) {
        BufferedReader reader;
        try {
            reader = new BufferedReader(new FileReader(sampleFile));
        }
        catch (IOException e) {
            throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
        }
        // set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
        Constructor con = new Constructor(SampleFileParser.class);
        TypeDescription desc = new TypeDescription(SampleFileParser.class);
        desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
        desc.putListPropertyType("sampleAliases", SampleAlias.class);
        con.addTypeDescription(desc);
        Yaml yaml = new Yaml(con);
        // SampleFileParser stores an object representation of a sample file - this is what we'll parse
        SampleFileParser parser;
        try {
            parser = (SampleFileParser) yaml.load(reader);
        }
        catch (Exception e) {
            throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
        }
        // check to see which validation options were built into the file
        boolean restrictProperties = parser.getAllowedProperties() != null;
        boolean restrictRelationships = parser.getAllowedRelationships() != null;
        boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
        // propertyValues stores the values that are allowed for a given property
        HashMap<String, HashSet> propertyValues = null;
        if (restrictPropertyValues) {
            propertyValues = new HashMap<String, HashSet>();
            for (PropertyDefinition def : parser.getPropertyDefinitions()) {
                HashSet<String> set = new HashSet<String>();
                for (String value : def.getValues()) {
                    set.add(value);
                }
                propertyValues.put(def.getProperty(), set);
            }
        }
        // make sure the aliases are valid
        validateAliases(parser);
        // loop through each sample in the file - a SampleParser stores an object that will become a Sample
        for (SampleParser sampleParser : parser.getSamples()) {
            try {
            // step 1: add the sample if it doesn't already exist
            Sample sample = getSampleById(sampleParser.getId());
            if (sample == null) {
                sample = new Sample(sampleParser.getId());
            }
            addSample(sample);
            sample.setSampleFileEntry(true);
            // step 2: add the properties
            if (sampleParser.getProperties() != null) {
                for (String property : sampleParser.getProperties().keySet()) {
                    // check that property is allowed
                    if (restrictProperties) {
                        if (!isPropertyValid(property, parser.getAllowedProperties())) {
                            throw new StingException(property + " is an invalid property. It is not included in the list " +
                                    "of allowed properties.");
                        }
                    }
                    // next check that the value is allowed
                    if (restrictPropertyValues) {
                        if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
                            throw new StingException("The value of property '" + property + "' is invalid. " +
                                    "It is not included in the list of allowed values for this property.");
                        }
                    }
                    // next check that there isn't already a conflicting property there
                    if (sample.getProperty(property) != null &&
                            sample.getProperty(property) != sampleParser.getProperties().get(property))
                    {
                        throw new StingException(property + " is a conflicting property!");
                    }
                    // checks are passed - now add the property!
                    saveProperty(sample, property, sampleParser.getProperties().get(property));
                }
            }
            // step 3: add the relationships
            if (sampleParser.getRelationships() != null) {
                for (String relationship : sampleParser.getRelationships().keySet()) {
                    String relativeId = sampleParser.getRelationships().get(relationship);
                    if (relativeId == null) {
                        throw new StingException("The relationship cannot be null");
                    }
                    // first check that it's not invalid
                    if (restrictRelationships) {
                        if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
                            throw new StingException(relationship + " is an invalid relationship");
                        }
                    }
                    // next check that there isn't already a conflicting property there
                    if (sample.getRelationship(relationship) != null) {
                        if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
                            throw new StingException(relationship + " is a conflicting relationship!");
                        }
                        // if the relationship is already set - and consistent with what we're reading now - no need to continue
                        else {
                            continue;
                        }
                    }
                    // checks are passed - now save the relationship
                    saveRelationship(sample, relationship, relativeId);
                }
            }
        } catch (Exception e) {
              throw new StingException("An error occurred while loading this sample from the sample file: " +
                      sampleParser.getId(), e);
        }
        }
    }
    private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
        // if the property values weren't specified for this property, then any value is okay
        if (!valuesList.containsKey(key)) {
            return true;
        }
        // if this property has enumerated values, it must be a string
        else if (value.getClass() != String.class)
            return false;
        // is the value specified or not?
        else if (!valuesList.get(key).contains(value))
            return false;
        return true;
    }
    /**
     * Makes sure that the aliases are valid
     * Checks that 1) no string is used as both a main ID and an alias;
     * 2) no alias is used more than once
     * @param parser
     */
    private void validateAliases(SampleFileParser parser) {
        // no aliases sure validate
        if (parser.getSampleAliases() == null)
            return;
        HashSet<String> mainIds = new HashSet<String>();
        HashSet<String> otherIds = new HashSet<String>();
        for (SampleAlias sampleAlias : parser.getSampleAliases()) {
            mainIds.add(sampleAlias.getMainId());
            for (String otherId : sampleAlias.getOtherIds()) {
                if (mainIds.contains(otherId))
                    throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
                            "be both a main ID and an other ID", otherId));
                if (!otherIds.add(otherId))
                    throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
                            "alias more than once.", otherId));
            }
        }
    }
    private boolean isPropertyValid(String property, String[] allowedProperties) {
        // is it a special property that is always allowed?
        for (String allowedProperty : specialProperties) {
            if (property.equals(allowedProperty))
                return true;
        }
        // is it in the allowed properties list?
        for (String allowedProperty : allowedProperties) {
            if (property.equals(allowedProperty))
                return true;
        }
        return false;
    }
    private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
        // is it a special relationship that is always allowed?
        for (String allowedRelationship : specialRelationships) {
            if (relationship.equals(allowedRelationship))
                return true;
        }
        // is it in the allowed properties list?
        for (String allowedRelationship : allowedRelationships) {
            if (relationship.equals(allowedRelationship))
                return true;
        }
        return false;
    }
    /**
     * Saves a property as the correct type
     * @param key property key
     * @param value property value, as read from YAML parser
     * @return property value to be stored
     */
    private void saveProperty(Sample sample, String key, Object value) {
        // convert gender to the right type, if it was stored as a String
        if (key.equals("gender")) {
            if (((String) value).toLowerCase().equals("male")) {
                value = Sample.Gender.MALE;
            }
            else if (((String) value).toLowerCase().equals("female")) {
                value = Sample.Gender.FEMALE;
            }
            else  if (((String) value).toLowerCase().equals("unknown")) {
                value = Sample.Gender.UNKNOWN;
            }
            else if (value != null) {
                throw new StingException("'gender' property must be male, female, or unknown.");
            }
        }
        try {
            sample.setProperty(key, value);
        }
        catch (Exception e) {
            throw new StingException("Could not save property " + key, e);
        }
    }
    /**
     * Saves a relationship as the correct type
     * @param key relationship key
     * @param relativeId sample ID string of the relative
     * @return relationship value to be stored
     */
    private void saveRelationship(Sample sample, String key, String relativeId) {
        // get the reference that we'll store as the value
        Sample relative = getSampleById(relativeId);
        // create sample object for the relative, if necessary
        if (relative == null) {
            relative = new Sample(relativeId);
            addSample(relative);
        }
        sample.setRelationship(key, relative);
    }
    /**
     * Filter a sample name in case it is an alias
     * @param sampleId to be filtered
     * @return ID of sample that stores data for this alias
     */
    private String aliasFilter(String sampleId) {
        if (!sampleAliases.containsKey(sampleId))
            return sampleId;
        else
            return sampleAliases.get(sampleId);
    }
    /**
     * Add a sample to the collection
     * @param sample to be added
     */
    private void addSample(Sample sample) {
        samples.put(sample.getId(), sample);
    }
    /**
     * Check if sample with this ID exists
     * Note that this will return true if name passed in is an alias
     * @param id ID of sample to be checked
     * @return true if sample exists; false if not
     */
    public boolean hasSample(String id) {
        return samples.get(aliasFilter(id)) != null;
    }
    /**
     * Get a sample by its ID
     * If an alias is passed in, return the main sample object 
     * @param id
     * @return sample Object with this ID
     */
    public Sample getSampleById(String id) {
        return samples.get(aliasFilter(id));
    }
    /**
     * Get the sample for a given read group
     * Must first look up ID for read group
     * @param readGroup of sample
     * @return sample object with ID from the read group
     */
    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
        String nameFromReadGroup = readGroup.getSample();
        return getSampleById(nameFromReadGroup);
    }
    /**
     * Get a sample for a given read
     * Must first look up read group, and then sample ID for that read group
     * @param read of sample
     * @return sample object of this read
     */
    public Sample getSampleByRead(SAMRecord read) {
        return getSampleByReadGroup(read.getReadGroup());
    }
    /**
     * Get number of sample objects
     * @return size of samples map
     */
    public int sampleCount() {
        return samples.size();
    }
    /**
     * Return all samples with a given family ID
     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
     * @param familyId
     * @return
     */
    public Set<Sample> getFamily(String familyId) {
        HashSet<Sample> familyMembers = new HashSet<Sample>();
        for (Sample sample : samples.values()) {
            if (sample.getFamilyId() != null) {
                if (sample.getFamilyId().equals(familyId))
                    familyMembers.add(sample);
            }
        }
        return familyMembers;
    }
    /**
     * Returns all children of a given sample
     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
     * @param sample
     * @return
     */
    public Set<Sample> getChildren(Sample sample) {
        HashSet<Sample> children = new HashSet<Sample>();
        for (Sample familyMember : getFamily(sample.getFamilyId())) {
            if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
                children.add(familyMember);
            }
        }
        return children;
    }
    public Set<Sample> getSamples() {
        HashSet<Sample> set = new HashSet<Sample>();
        set.addAll(samples.values());
        return set;
    }
    /**
     * Takes a collection of sample names and returns their corresponding sample objects
     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
     * @param sampleNameList Set of sample names
     * @return Corresponding set of samples
     */
    public Set<Sample> getSamples(Collection<String> sampleNameList) {
 	HashSet<Sample> samples = new HashSet<Sample>();
        for (String name : sampleNameList) {
            try {
                samples.add(getSampleById(name));
            }
            catch (Exception e) {
                throw new StingException("Could not get sample with the following ID: " + name, e);
            }
        }
        return samples;
    }
    /**
     * Returns a set of samples that have any value (which could be null) for a given property
     * @param key Property key
     * @return Set of samples with the property
     */
    public Set<Sample> getSamplesWithProperty(String key) {
        HashSet<Sample> toReturn = new HashSet<Sample>();
        for (Sample s : samples.values()) {
            if (s.hasProperty(key))
                toReturn.add(s);
        }
        return toReturn;
    }
    /**
     * Returns a set of samples that have a property with a certain value
     * Value must be a string for now - could add a similar method for matching any objects in the future
     *
     * @param key Property key
     * @param value String property value
     * @return Set of samples that match key and value
     */
    public Set<Sample> getSamplesWithProperty(String key, String value) {
        Set<Sample> toReturn = getSamplesWithProperty(key);
        for (Sample s : toReturn) {
            if (!s.getProperty(key).equals(value))
                toReturn.remove(s);
        }
        return toReturn;
    }
    public Sample getOrCreateSample(String id) {
        Sample sample = getSampleById(id);
        if (sample == null) {
            sample = new Sample(id);
            addSample(sample);
        }
        return sample;
    }
    /**
     * Returns all samples that were referenced in the SAM file
     */
    public Set<Sample> getSAMFileSamples() {
        Set<Sample> toReturn = new HashSet<Sample>();
        for (Sample sample : samples.values()) {
            if (sample.hasSAMFileEntry())
                toReturn.add(sample);
        }
        return toReturn;
    }
    /**
     * Returns a set of sample objects for the sample names in a variant context
     *
     * @param context Any variant context
     * @return a set of the sample objects
     */
    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
        Set<Sample> samples = new HashSet<Sample>();
        for (String sampleName : context.getSampleNames()) {
            samples.add(getOrCreateSample(sampleName));
        }
        return samples;
    }
    /**
     * Return a subcontext restricted to samples with a given property key/value
     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
     * @param context VariantContext to filter
     * @param key property key
     * @param value property value (must be string)
     * @return subcontext
     */
    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
        Set<String> samplesWithProperty = new HashSet<String>();
        for (String sampleName : context.getSampleNames()) {
            Sample s = samples.get(sampleName);
            if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
                samplesWithProperty.add(sampleName);
        }
        Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
        return context.subContextFromGenotypes(genotypes.values());
    }
    public static SampleDataSource createEmptyDataSource() {
        SAMFileHeader header = new SAMFileHeader();
        return new SampleDataSource(header, null);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
@ -1,65 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Aug 12, 2010
 * Time: 1:30:44 PM
 */
 public class SampleFileParser {
    private SampleAlias[] sampleAliases;
    private String[] allowedProperties;
    private String[] allowedRelationships;
    private PropertyDefinition[] propertyDefinitions;
    private SampleParser[] samples;
    public PropertyDefinition[] getPropertyDefinitions() {
        return propertyDefinitions;
    }
    public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
        this.propertyDefinitions = propertyDefinitions;
    }
    public SampleFileParser() {
    }
    public String[] getAllowedProperties() {
        return allowedProperties;
    }
    public void setAllowedProperties(String[] allowedProperties) {
        this.allowedProperties = allowedProperties;
    }
    public SampleParser[] getSamples() {
        return samples;
    }
    public void setSamples(SampleParser[] samples) {
        this.samples = samples;
    }
    public String[] getAllowedRelationships() {
        return allowedRelationships;
    }
    public void setAllowedRelationships(String[] allowedRelationships) {
        this.allowedRelationships = allowedRelationships;
    }
    public SampleAlias[] getSampleAliases() {
        return sampleAliases;
    }
    public void setSampleAliases(SampleAlias[] sampleAliases) {
        this.sampleAliases = sampleAliases;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
@ -1,43 +0,0 @@
 package org.broadinstitute.sting.gatk.datasources.sample;
 import java.util.HashMap;
 /**
 * Created by IntelliJ IDEA.
 * User: brett
 * Date: Aug 13, 2010
 * Time: 2:09:43 PM
 */
 public class SampleParser {
    private String id;
    private HashMap<String, Object> properties;
    private HashMap<String, String> relationships;
    public String getId() {
        return id;
    }
    public void setId(String id) {
        this.id = id;
    }
    public HashMap<String, Object> getProperties() {
        return properties;
    }
    public void setProperties(HashMap<String, Object> properties) {
        this.properties = properties;
    }
    public HashMap<String, String> getRelationships() {
        return relationships;
    }
    public void setRelationships(HashMap<String, String> relationships) {
        this.relationships = relationships;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -85,12 +85,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     */
    protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
        super(engine, walker, reads, reference, rods);
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
        if (engine.getArguments().processingTrackerFile != null) {
            throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode.  Complain to Mark depristo@broadinstitute.org to implement and test this code path");
        }
    }
    public Object execute( Walker walker, ShardStrategy shardStrategy ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.SampleUtils;
 import java.util.Collection;
@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
            traversalEngine.startTimersIfNecessary();
            if(shard.getShardType() == Shard.ShardType.LOCUS) {
                LocusWalker lWalker = (LocusWalker)walker;
-                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
+                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
                        getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
                for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
                    ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
                    Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
@ -62,16 +62,17 @@ public class ShardTraverser implements Callable {
            Object accumulator = walker.reduceInit();
            LocusWalker lWalker = (LocusWalker)walker;
-            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
+            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
-            ShardDataProvider dataProvider = null;
+                    microScheduler.getReadIterator(shard),
                    shard.getGenomeLocs(),
                    microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
            for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
-                dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
+                final ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
                accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
                dataProvider.close();
            }
            if (dataProvider != null) dataProvider.close();
            windowMaker.close();
            outputMergeTask = outputTracker.closeStorage();
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
     * the given intervals.
     * @param iterator The data source for this window.
     * @param intervals The set of intervals over which to traverse.
-     * @param sampleData SampleDataSource that we can reference reads with
+     * @param sampleNames The complete set of sample names in the reads in shard
     */
-    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
+    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
        this.sourceInfo = shard.getReadProperties();
        this.readIterator = iterator;
-
+        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
        this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
    }
    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
        this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
    }
    public Iterator<WindowMakerIterator> iterator() {
        return this;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java
@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 /**
 * Filter out malformed reads.
@ -37,14 +39,25 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 */
 public class MalformedReadFilter extends ReadFilter {
    private SAMFileHeader header;
-    
+
    @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
    boolean filterMismatchingBaseAndQuals = false;
    @Override
    public void initialize(GenomeAnalysisEngine engine) {
        this.header = engine.getSAMFileHeader();
    }
    public boolean filterOut(SAMRecord read) {
-        return !checkInvalidAlignmentStart(read) ||
+        // slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided
        if (!checkMismatchingBasesAndQuals(read)) {
            if (!filterMismatchingBaseAndQuals)
                throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +"  [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals");
            else
                return true;
        }
        return  !checkInvalidAlignmentStart(read) ||
                !checkInvalidAlignmentEnd(read) ||
                !checkAlignmentDisagreesWithHeader(this.header,read) ||
                !checkCigarDisagreesWithAlignment(read);
@ -108,4 +121,13 @@ public class MalformedReadFilter extends ReadFilter {
            return false;
        return true;
    }
    /**
     * Check if the read has the same number of bases and base qualities
     * @param read the read to validate
     * @return true if they have the same number. False otherwise.
     */
    private static boolean checkMismatchingBasesAndQuals(SAMRecord read) {
        return (read.getReadLength() == read.getBaseQualities().length);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
@ -0,0 +1,23 @@
 package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Argument;
 /**
 * Created by IntelliJ IDEA.
 * User: chartl
 * Date: 9/19/11
 * Time: 4:09 PM
 * To change this template use File | Settings | File Templates.
 */
 public class ReadNameFilter extends ReadFilter {
     @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
    private String readName;
    public boolean filterOut(final SAMRecord rec) {
        return ! rec.getReadName().equals(readName);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        else if ( stub.getOutputStream() != null ) {
            this.file = null;
            this.stream = stub.getOutputStream();
-            writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
+            writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
        }
        else
            throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        }
        // The GATK/Tribble can't currently index block-compressed files on the fly.  Disable OTF indexing even if the user explicitly asked for it.
-        return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
+        return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
@ -25,6 +25,7 @@
 package org.broadinstitute.sting.gatk.io.stubs;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.gatk.CommandLineExecutable;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
        return isCompressed;
    }
    /**
     * Gets the master sequence dictionary from the engine associated with this stub
     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
     * @return
     */
    public SAMSequenceDictionary getMasterSequenceDictionary() {
        return engine.getMasterSequenceDictionary();
    }
    /**
     * Should we tell the VCF writer not to write genotypes?
     * @return true if the writer should not write genotypes.
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -35,26 +35,23 @@ import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.ReservoirDownsampler;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import java.util.*;
 /** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
 public class LocusIteratorByState extends LocusIterator {
 //    private static long discarded_bases = 0L;
 //    private static long observed_bases = 0L;
    /** our log, which we want to capture anything from this class */
    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
     * Used to create new GenomeLocs.
     */
    private final GenomeLocParser genomeLocParser;
-    private final ArrayList<Sample> samples;
+    private final ArrayList<String> samples;
    private final ReadStateManager readStates;
    static private class SAMRecordState {
@ -278,15 +275,27 @@ public class LocusIteratorByState extends LocusIterator {
    //
    // -----------------------------------------------------------------------------------------------------------------
-    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
+    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
        this.readInfo = readInformation;
        this.genomeLocParser = genomeLocParser;
        this.samples = new ArrayList<String>(samples);
        this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
-        // get the list of samples
+        // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
-        this.samples = new ArrayList<Sample>(sampleData.getSamples());
+        // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
-        
+        if ( this.samples.isEmpty() && samIterator.hasNext() ) {
-        readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
+            throw new IllegalArgumentException("samples list must not be empty");
-    
+        }
    }
    /**
     * For testing only.  Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
     * for the system.
     */
    public final static Collection<String> sampleListForSAMWithoutReadGroups() {
        List<String> samples = new ArrayList<String>();
        samples.add(null);
        return samples;
    }
    public Iterator<AlignmentContext> iterator() {
@ -303,19 +312,6 @@ public class LocusIteratorByState extends LocusIterator {
        //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
    }
    public void printState() {
        for(Sample sample: samples) {
            Iterator<SAMRecordState> iterator = readStates.iterator(sample);
            while(iterator.hasNext()) {
                SAMRecordState state = iterator.next();
                logger.debug(String.format("printState():"));
                SAMRecord read = state.getRead();
                int offset = state.getReadOffset();
                logger.debug(String.format("  read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
            }
        }
    }
    private GenomeLoc getLocation() {
        return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
    }
@ -355,14 +351,14 @@ public class LocusIteratorByState extends LocusIterator {
            // In this case, the subsequent call to next() will emit the normal pileup at the current base
            // and shift the position.
            if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
-                Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
+                Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();
                // get current location on the reference and decrement it by 1: the indels we just stepped over
                // are associated with the *previous* reference base
                GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);
                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
                    hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
@ -382,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
                                maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
                            }
                            else nInsertions++;
-                            indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
+                            indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );
                                                                           state.getReadEventStartOffset(),
                                                                           state.getEventLength(),
                                                                           state.getEventBases()) );
                        }   else {
                            // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
@ -407,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
                                    // we count such reads (with a longer deletion spanning over a deletion at the previous base we are
                                    // about to report) only if includeReadsWithDeletionAtLoci is true.
                                    size++;
-                                    indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
+                                    indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
                                                                           state.getReadOffset()-1,
                                                                           -1) // length=-1 --> noevent
                                            );
                                }
                            }
@ -426,10 +417,10 @@ public class LocusIteratorByState extends LocusIterator {
                nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
            }  else {
                GenomeLoc location = getLocation();
-                Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
+                Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();
                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
                    hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
@ -447,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
                                continue;
                            } else {
                                //observed_bases++;
-                                pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
+                                pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
                                size++;
                            }
                        } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
                            size++;
-                            pile.add(new PileupElement(state.getRead(), -1));
+                            pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
                            nDeletions++;
                        }
@ -495,7 +486,7 @@ public class LocusIteratorByState extends LocusIterator {
    }
    private void updateReadStates() {
-        for(Sample sample: samples) {
+        for(final String sample: samples) {
            Iterator<SAMRecordState> it = readStates.iterator(sample);
            while ( it.hasNext() ) {
                SAMRecordState state = it.next();
@ -522,7 +513,7 @@ public class LocusIteratorByState extends LocusIterator {
        private final PeekableIterator<SAMRecord> iterator;
        private final DownsamplingMethod downsamplingMethod;
        private final SamplePartitioner samplePartitioner;
-        private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
+        private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
        private final int targetCoverage;
        private int totalReadStates = 0;
@ -540,9 +531,9 @@ public class LocusIteratorByState extends LocusIterator {
            }
            Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                readStatesBySample.put(sample,new PerSampleReadStateManager());
-                readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
+                readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
            }
            samplePartitioner = new SamplePartitioner(readSelectors);
@ -554,7 +545,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Iterator over the reads associated with that sample.
         */
-        public Iterator<SAMRecordState> iterator(final Sample sample) {
+        public Iterator<SAMRecordState> iterator(final String sample) {
            return new Iterator<SAMRecordState>() {
                private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
@ -590,7 +581,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Total number of reads in the given sample.
         */
-        public int size(final Sample sample) {
+        public int size(final String sample) {
            return readStatesBySample.get(sample).size();
        }
@ -600,12 +591,12 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample Sample, downsampled independently.
         * @return Integer stop of the furthest undownsampled region.
         */
-        public int getDownsamplingExtent(final Sample sample) {
+        public int getDownsamplingExtent(final String sample) {
            return readStatesBySample.get(sample).getDownsamplingExtent();
        }
        public SAMRecordState getFirst() {
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                PerSampleReadStateManager reads = readStatesBySample.get(sample);
                if(!reads.isEmpty())
                    return reads.peek();
@ -639,8 +630,8 @@ public class LocusIteratorByState extends LocusIterator {
            }
            samplePartitioner.complete();
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
-                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
+                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
                Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
@ -1072,6 +1063,3 @@ class SamplePartitioner implements ReadSelector {
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 /**
 * An iterator which does post-processing of a read, including potentially wrapping
@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
     *         no next exists.
     */
    public SAMRecord next() {
-        return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities);
+        SAMRecord rec = wrappedIterator.next();
        // if we are using default quals, check if we need them, and add if necessary.
        // 1. we need if reads are lacking or have incomplete quality scores
        // 2. we add if defaultBaseQualities has a positive value
        if (defaultBaseQualities >= 0) {
            byte reads [] = rec.getReadBases();
            byte quals [] = rec.getBaseQualities();
            if (quals == null || quals.length < reads.length) {
                byte new_quals [] = new byte [reads.length];
                for (int i=0; i<reads.length; i++)
                    new_quals[i] = defaultBaseQualities;
                rec.setBaseQualities(new_quals);
            }
        }
        // if we are using original quals, set them now if they are present in the record
        if ( useOriginalBaseQualities ) {
            byte[] originalQuals = rec.getOriginalBaseQualities();
            if ( originalQuals != null )
                rec.setBaseQualities(originalQuals);
        }
        return rec;
    }
    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -28,7 +28,6 @@ import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -101,9 +100,6 @@ public class GATKRunReport {
    @Element(required = false, name = "exception")
    private final ExceptionToXML mException;
    @Element(required = false, name = "argument_collection")
    private final GATKArgumentCollection mCollection;
    @Element(required = true, name = "working_directory")
    private String currentPath;
@ -187,7 +183,6 @@ public class GATKRunReport {
            cmdLine = engine.createApproximateCommandLineArgumentString(engine, walker);
        } catch (Exception ignore) { }
        this.mCollection = engine.getArguments();
        walkerName = engine.getWalkerName(walker.getClass());
        svnVersion = CommandLineGATK.getVersionNumber();
@ -293,15 +288,16 @@ public class GATKRunReport {
     * That is, postReport() is guarenteed not to fail for any reason.
     */
    private File postReportToLocalDisk(File rootDir) {
        String filename = getID() + ".report.xml.gz";
        File file = new File(rootDir, filename);
        try {
            String filename = getID() + ".report.xml.gz";
            File file = new File(rootDir, filename);
            postReportToFile(file);
            logger.debug("Wrote report to " + file);
            return file;
        } catch ( Exception e ) {
            // we catch everything, and no matter what eat the error
            exceptDuringRunReport("Couldn't read report file", e);
            file.delete();
            return null;
        }
    }
@ -312,6 +308,7 @@ public class GATKRunReport {
        File localFile = postReportToLocalDisk(new File("./"));
        logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
        if ( localFile != null ) { // we succeeded in creating the local file
            localFile.deleteOnExit();
            try {
                // stop us from printing the annoying, and meaningless, mime types warning
                Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
@ -336,14 +333,13 @@ public class GATKRunReport {
                //logger.info("Uploading " + localFile + " to AWS bucket");
                S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
                logger.debug("Uploaded to AWS: " + s3Object);
                logger.info("Uploaded run statistics report to AWS S3");
            } catch ( S3ServiceException e ) {
                exceptDuringRunReport("S3 exception occurred", e);
            } catch ( NoSuchAlgorithmException e ) {
                exceptDuringRunReport("Couldn't calculate MD5", e);
            } catch ( IOException e ) {
                exceptDuringRunReport("Couldn't read report file", e);
            } finally {
                localFile.delete();
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
@ -1,130 +0,0 @@
 package org.broadinstitute.sting.gatk.refdata.indexer;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.Tribble;
 import org.broad.tribble.index.Index;
 import org.broad.tribble.index.IndexFactory;
 import org.broad.tribble.util.LittleEndianOutputStream;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.CommandLineProgram;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
 import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import java.io.File;
 import java.io.FileOutputStream;
 /**
 * a utility class that can create an index, written to a target location.  This is useful when you're unable to write to the directory
 * in which an index is located, or if you'd like to pre-index files to save time.
 */
 public class RMDIndexer extends CommandLineProgram {
    @Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
    File inputFileSource = null;
    @Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
    String inputFileType = null;
    @Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
    public File referenceFile = null;
    @Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
    public File indexFile = null;
    @Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
    IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
    private static Logger logger = Logger.getLogger(RMDIndexer.class);
    private IndexedFastaSequenceFile ref = null;
    private GenomeLocParser genomeLocParser = null;
    @Override
    protected int execute() throws Exception {
        // check parameters
        // ---------------------------------------------------------------------------------
        // check the input parameters
        if (referenceFile != null && !referenceFile.canRead())
            throw new IllegalArgumentException("We can't read the reference file: "
                + referenceFile + ", check that it exists, and that you have permissions to read it");
        // set the index file to the default name if they didn't specify a file
        if (indexFile == null && inputFileSource != null)
            indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
        // check that we can create the output file
        if (indexFile == null || indexFile.exists())
            throw new IllegalArgumentException("We can't write to the index file location: "
                + indexFile + ", the index exists");
        logger.info(String.format("attempting to index file:   %s", inputFileSource));
        logger.info(String.format("using reference:            %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
        logger.info(String.format("using type:                 %s", inputFileType));
        logger.info(String.format("writing to location:        %s", indexFile.getAbsolutePath()));
        // try to index the file
        // ---------------------------------------------------------------------------------
        // setup the reference
        ref = new CachingIndexedFastaSequenceFile(referenceFile);
        genomeLocParser = new GenomeLocParser(ref);
        // get a track builder
        RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
        // find the types available to the track builders
        FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
        // check that the type is valid
        if (descriptor == null)
            throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type.  Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
        // create the codec
        FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
        // check if it's a reference dependent feature codec
        if (codec instanceof ReferenceDependentFeatureCodec)
            ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
        // get some timing info
        long currentTime = System.currentTimeMillis();
        Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
        // add writing of the sequence dictionary, if supplied
        builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
        // create the output stream, and write the index
        LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
        index.write(stream);
        stream.close();
        // report and exit
        logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
        return 0;  // return successfully
    }
    /**
     * the generic call execute main
     * @param argv the arguments from the command line
     */
    public static void main(String[] argv) {
        try {
            RMDIndexer instance = new RMDIndexer();
            start(instance, argv);
            System.exit(CommandLineProgram.result);
        } catch (Exception e) {
            exitSystemWithError(e);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
@ -0,0 +1,106 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.refdata.tracks;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.index.Index;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 /**
 * Utilities for working with Sequence Dictionaries embedded in tribble indices
 *
 * @author Your Name
 * @since Date created
 */
 public class IndexDictionaryUtils {
    private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
    // a constant we use for marking sequence dictionary entries in the Tribble index property list
    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
    /**
     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
     * @param index the index file to use
     * @return a SAMSequenceDictionary if available, null if unavailable
     */
    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
                        Integer.valueOf(entry.getValue())));
        }
        return dict;
    }
    /**
     * create the sequence dictionary with the contig list; a backup approach
     * @param index the index file to use
     * @param dict the sequence dictionary to add contigs to
     * @return the filled-in sequence dictionary
     */
    static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
        LinkedHashSet<String> seqNames = index.getSequenceNames();
        if (seqNames == null) {
            return dict;
        }
        for (String name : seqNames) {
            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
            dict.addSequence(seq);
        }
        return dict;
    }
    public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
        for ( SAMSequenceRecord seq : dict.getSequences() ) {
            final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
            final String length = String.valueOf(seq.getSequenceLength());
            index.addProperty(contig,length);
        }
    }
    public static void validateTrackSequenceDictionary(final String trackName,
                                                       final SAMSequenceDictionary trackDict,
                                                       final SAMSequenceDictionary referenceDict,
                                                       final ValidationExclusion.TYPE validationExclusionType ) {
        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
        if (trackDict == null || trackDict.size() == 0)
            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
        else {
            Set<String> trackSequences = new TreeSet<String>();
            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
                trackSequences.add(dictionaryEntry.getSequenceName());
            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.gatk.refdata.tracks;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.util.LinkedHashSet;
 import java.util.Map;
 import java.util.Set;
 import java.util.TreeSet;
 /**
- * 
+ *
- * @author aaron 
+ * @author aaron
 *                                           `
 * Class RMDTrackBuilder
 *
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
    private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
    public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;
    // a constant we use for marking sequence dictionary entries in the Tribble index property list
    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
    // private sequence dictionary we use to set our tracks with
    private SAMSequenceDictionary dict = null;
@ -150,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);
        if (descriptor == null)
-            throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
+            throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());
        return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
    }
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
                try { logger.info(String.format("  Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
                catch (ReviewedStingException e) { }
-                sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
                if (sequenceDictionary.size() == 0 && dict != null) {
                    File indexFile = Tribble.indexFile(inputFile);
-                    setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
+                    validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
-                    sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                    try { // re-write the index
                        writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
                    } catch (IOException e) {
                        logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
                    }
                    sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                }
                if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        // this can take a while, let them know what we're doing
        logger.info("Creating Tribble index in memory for file " + inputFile);
        Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
-        setIndexSequenceDictionary(inputFile, idx, dict, null, false);
+        validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
        return idx;
    }
    // ---------------------------------------------------------------------------------------------------------
    // static functions to work with the sequence dictionaries of indexes
    // ---------------------------------------------------------------------------------------------------------
    /**
     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
     * @param index the index file to use
     * @return a SAMSequenceDictionary if available, null if unavailable
     */
    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
        SAMSequenceDictionary dict = new SAMSequenceDictionary();
        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
                                 Integer.valueOf(entry.getValue())));
        }
        return dict;
    }
    /**
     * create the sequence dictionary with the contig list; a backup approach
     * @param index the index file to use
     * @param dict the sequence dictionary to add contigs to
     * @return the filled-in sequence dictionary
     */
    private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
        LinkedHashSet<String> seqNames = index.getSequenceNames();
        if (seqNames == null) {
            return dict;
        }
        for (String name : seqNames) {
            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
            dict.addSequence(seq);
        }
        return dict;
    }
    /**
     * set the sequence dictionary of the track.  This function checks that the contig listing of the underlying file is compatible.
     * (that each contig in the index is in the sequence dictionary).
     * @param inputFile for proper error message formatting.
     * @param dict the sequence dictionary
     * @param index the index file
     * @param indexFile the index file
     * @param rewriteIndex should we rewrite the index when we're done?
     *
     */
-    public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
+    public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
-        if (dict == null) return;
+        if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");
        SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
        validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
        // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
-        for (SAMSequenceRecord seq : currentDict.getSequences()) {
+        final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
-            if (dict.getSequence(seq.getSequenceName()) == null)
+        validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
-                continue;
+
-            index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
+        // actually update the dictionary in the index
-        }
+        IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
        // re-write the index
        if (rewriteIndex) try {
            writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
        } catch (IOException e) {
            logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
        }
    }
-
+    public void validateTrackSequenceDictionary(final String trackName,
-    public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
+                                                final SAMSequenceDictionary trackDict,
-        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
+                                                final SAMSequenceDictionary referenceDict ) {
-        if (trackDict == null || trackDict.size() == 0)
+        IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
        else {
            Set<String> trackSequences = new TreeSet<String>();
            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
                trackSequences.add(dictionaryEntry.getSequenceName());
            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java
@ -1,57 +0,0 @@
 package org.broadinstitute.sting.gatk.refdata.utils;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import java.util.*;
 /**
 * 
 * @author aaron 
 * 
 * Class RMDIntervalGenerator
 *
 * Creates an interval list, given an RMDTrack
 */
 public class RMDIntervalGenerator {
    public ReferenceOrderedDataSource dataSource;
    /**
     * create a interval representation of a ROD track
     * @param dataSource the track
     */
    public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
        if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
        this.dataSource = dataSource;
    }
    /**
     * create a genome location list from the interval track
     * @return a list of genome locations
     */
    public List<GenomeLoc> toGenomeLocList() {
        Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
        List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
        while (iter.hasNext()) {
            RODRecordList feature = iter.next();
            GenomeLoc loc = feature.getLocation();
            if (loc != null) locations.add(loc);            
        }
        return locations;
    }
    /**
     * return a map of reference meta data track names to RODS
     * @param sources the reference ordered data sources to get the names from
     * @return a map of reference meta data names to RODS
     */
    public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
        // get a list of the current rod names we're working with
        Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
        for (ReferenceOrderedDataSource rod : sources) {
            rodNames.put(rod.getName(),rod);
        }
        return rodNames;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
@ -24,12 +24,14 @@
 package org.broadinstitute.sting.gatk.report;
 import org.broadinstitute.sting.utils.collections.Pair;
 import java.util.*;
 /**
 * Tracks a linked list of GATKReportColumn in order by name.
 */
-public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
+public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
    private List<String> columnNames = new ArrayList<String>();
    /**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
        columnNames.add(key);
        return super.put(key, value);
    }
    @Override
    public Iterator<GATKReportColumn> iterator() {
        return new Iterator<GATKReportColumn>() {
            int offset = 0;
            public boolean hasNext() { return offset < columnNames.size() ; }
            public GATKReportColumn next() { return getByIndex(offset++); }
            public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
        };
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -286,6 +286,10 @@ public class GATKReportTable {
        }
    }
    public boolean containsKey(Object primaryKey) {
        return primaryKeyColumn.contains(primaryKey);
    }
    /**
     * Set the value for a given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
@ -0,0 +1,46 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.samples;
 /**
 * Categorical sample trait for association and analysis
 *
 * Samples can have unknown status, be affected or unaffected by the
 * categorical trait, or they can be marked as actually having an
 * other trait value (stored in an associated value in the Sample class)
 *
 * @author Mark DePristo
 * @since Sept. 2011
 */
 public enum Affection {
    /** Status is unknown */
    UNKNOWN,
    /** Suffers from the disease */
    AFFECTED,
    /** Unaffected by the disease */
    UNAFFECTED,
    /** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
    OTHER
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
@ -0,0 +1,34 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.samples;
 /**
 * ENUM of possible human genders: male, female, or unknown
 */
 public enum Gender {
    MALE,
    FEMALE,
    UNKNOWN
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
@ -0,0 +1,310 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.samples;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.text.XReadLines;
 import java.io.*;
 import java.util.*;
 /**
 * Reads PED file-formatted tabular text files
 *
 * See http://www.broadinstitute.org/mpg/tagger/faq.html
 * See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
 *
 * The "ped" file format refers to the widely-used format for linkage pedigree data.
 * Each line describes a single (diploid) individual in the following format:
 *
 *      family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
 *
 * If your data lacks pedigree information (for example, unrelated case/control individuals),
 * set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
 * phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
 * Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
 * where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
 * Check out the information at the PLINK website on the "ped" file format.
 *
 * The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
 *  Family ID
 *  Individual ID
 *  Paternal ID
 *  Maternal ID
 *  Sex (1=male; 2=female; other=unknown)
 *  Phenotype
 *
 *  The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
 *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
 *  quantitative trait or an affection status column: PLINK will automatically detect which type
 *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
 *  Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
 *  and are actually representing these values as strings instead of doubles
 *
 *  NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
 *  not a comma, i.e. 2.394 not 2,394
 *
 *  If an individual's sex is unknown, then any character other than 1 or 2 can be used.
 *  When new files are created (PED, FAM, or other which contain sex) then the original coding will be
 *  preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
 *  and an error message will arise if an analysis that uses family information is requested and an
 *  individual of 'unknown' sex is specified as a father or mother.
 *
 *
 *  HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
 *  line will be ignored. Do not start any family IDs with this character therefore.
 *
 *  Affection status, by default, should be coded:
 *  -9 missing
 *   0 missing
 *   1 unaffected
 *   2 affected
 *
 * If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
 * plink --file mydata --1 which will specify a disease phenotype coded:
 *
 *  -9 missing
 *  0 unaffected
 *  1 affected
 *
 * The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
 * disease traits as well as 0). It can be reset by including the --missing-phenotype option:
 *
 * Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
 * (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
 * character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
 * alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
 *
 * No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
 *
 *   FAM001  1  0 0  1  2  A A  G G  A C
 *   FAM001  2  0 0  1  2  A A  A G  0 0
 *   ...
 *
 * Note that the GATK does not support genotypes in a PED file.
 *
 * @author Mark DePristo
 * @since 2011
 */
 public class PedReader {
    private static Logger logger = Logger.getLogger(PedReader.class);
    final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
    final static private String commentMarker = "#";
    /**
     * An enum that specifies which, if any, of the standard PED fields are
     * missing from the input records.  For example, suppose we have the full record:
     *
     * "fam1 kid dad mom 1 2"
     *
     * indicating a male affected child.  This can be parsed with the -ped x.ped argument
     * to the GATK.  Suppose we only have:
     *
     * "fam1 kid 1"
     *
     * we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
     */
    public enum MissingPedField {
        /**
         * The PED records do not have the first (FAMILY_ID) argument.  The family id
         * will be set to null / empty.
         */
        NO_FAMILY_ID,
        /**
         * The PED records do not have either the paternal or maternal IDs, so
         * the corresponding IDs are set to null.
         */
        NO_PARENTS,
        /**
         * The PED records do not have the GENDER field, so the sex of each
         * sample will be set to UNKNOWN.
         */
        NO_SEX,
        /**
         * The PED records do not have the PHENOTYPE field, so the phenotype
         * of each sample will be set to UNKNOWN.
         */
        NO_PHENOTYPE
    }
    protected enum Field {
        FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
    }
    // phenotype
    private final static String MISSING_VALUE1 = "-9";
    private final static String MISSING_VALUE2 = "0";
    private final static String PHENOTYPE_UNAFFECTED = "1";
    private final static String PHENOTYPE_AFFECTED = "2";
    // Sex
    private final static String SEX_MALE = "1";
    private final static String SEX_FEMALE = "2";
    // other=unknown
    public PedReader() { }
    public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException  {
        logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
        return parse(new FileReader(source), missingFields, sampleDB);
    }
    public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
        logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
        return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
    }
    public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
        final List<String> lines = new XReadLines(reader).readLines();
        // What are the record offsets?
        final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
        final int samplePos = familyPos + 1;
        final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
        final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
        final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
        final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
        final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
        // go through once and determine properties
        int lineNo = 1;
        boolean isQT = false;
        final List<String[]> splits = new ArrayList<String[]>(lines.size());
        for ( final String line : lines ) {
            if ( line.startsWith(commentMarker)) continue;
            if ( line.trim().equals("") ) continue;
            final String[] parts = line.split("\\s+");
            if ( parts.length != nExpectedFields )
                throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
            if ( phenotypePos != -1 ) {
                isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
            }
            splits.add(parts);
            lineNo++;
        }
        logger.info("Phenotype is other? " + isQT);
        // now go through and parse each record
        lineNo = 1;
        final List<Sample> samples = new ArrayList<Sample>(splits.size());
        for ( final String[] parts : splits ) {
            String familyID = null, individualID, paternalID = null, maternalID = null;
            Gender sex = Gender.UNKNOWN;
            String quantitativePhenotype = Sample.UNSET_QT;
            Affection affection = Affection.UNKNOWN;
            if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
            individualID = parts[samplePos];
            if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
            if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
            if ( sexPos != -1 ) {
                if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
                else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
                else sex = Gender.UNKNOWN;
            }
            if ( phenotypePos != -1 ) {
                if ( isQT ) {
                    if ( parts[phenotypePos].equals(MISSING_VALUE1) )
                        affection = Affection.UNKNOWN;
                    else {
                        affection = Affection.OTHER;
                        quantitativePhenotype = parts[phenotypePos];
                    }
                } else {
                    if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
                    else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
                    else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
                    else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
                    else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
                }
            }
            final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
            samples.add(s);
            sampleDB.addSample(s);
            lineNo++;
        }
        for ( final Sample sample : new ArrayList<Sample>(samples) ) {
            Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
            if ( dad != null ) samples.add(dad);
            Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
            if ( mom != null ) samples.add(mom);
        }
        return samples;
    }
    private final static String maybeMissing(final String string) {
        if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
            return null;
        else
            return string;
    }
    private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
        if ( id != null && sampleDB.getSample(id) == null ) {
            Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
            sampleDB.addSample(s);
            return s;
        } else
            return null;
    }
    /**
     * Parses a list of tags from the command line, assuming it comes from the GATK Engine
     * tags, and returns the corresponding EnumSet.
     *
     * @param arg the actual engine arg, used for the UserException if there's an error
     * @param tags a list of string tags that should be converted to the MissingPedField value
     * @return
     */
    public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
        final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
        for ( final String tag : tags ) {
            try {
                missingFields.add(MissingPedField.valueOf(tag));
            } catch ( IllegalArgumentException e ) {
                throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
            }
        }
        return missingFields;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
@ -0,0 +1,41 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.samples;
 /**
 *
 */
 public enum PedigreeValidationType {
    /**
     * Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
     * entry in the pedigree file(s).
     */
    STRICT,
    /**
     * Do not enforce any overlap between the VCF/BAM samples and the pedigree data
     * */
    SILENT
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
@ -0,0 +1,222 @@
 package org.broadinstitute.sting.gatk.samples;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import java.util.HashMap;
 import java.util.Map;
 /**
 *
 */
 public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
    final private String familyID, paternalID, maternalID;
    final private Gender gender;
    final private String otherPhenotype;
    final private Affection affection;
    final private String ID;
    final private SampleDB infoDB;
    final private Map<String, Object> properties = new HashMap<String, Object>();
    public final static String UNSET_QT = null;
    public Sample(final String ID, final SampleDB infoDB,
                  final String familyID, final String paternalID, final String maternalID,
                  final Gender gender, final Affection affection, final String otherPhenotype) {
        this.familyID = familyID;
        this.paternalID = paternalID;
        this.maternalID = maternalID;
        this.gender = gender;
        this.otherPhenotype = otherPhenotype;
        this.affection = affection;
        this.ID = ID;
        this.infoDB = infoDB;
    }
    protected Sample(final String ID,
                     final String familyID, final String paternalID, final String maternalID,
                     final Gender gender, final Affection affection, final String otherPhenotype) {
        this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
    }
    protected Sample(final String ID,
                     final String familyID, final String paternalID, final String maternalID,
                     final Gender gender, final Affection affection) {
        this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
    }
    public Sample(final String ID, final SampleDB infoDB,
                  final String familyID, final String paternalID, final String maternalID, final Gender gender) {
        this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
    }
    public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
        this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
    }
    public Sample(String id, SampleDB infoDB) {
        this(id, infoDB, null, null, null,
                Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
    }
    // -------------------------------------------------------------------------------------
    //
    // standard property getters
    //
    // -------------------------------------------------------------------------------------
    public String getID() {
        return ID;
    }
    public String getFamilyID() {
        return familyID;
    }
    public String getPaternalID() {
        return paternalID;
    }
    public String getMaternalID() {
        return maternalID;
    }
    public Affection getAffection() {
        return affection;
    }
    public boolean hasOtherPhenotype() {
        return affection == Affection.OTHER;
    }
    public String getOtherPhenotype() {
        return otherPhenotype;
    }
    /**
     * Get the sample's mother
     * @return sample object with relationship mother, if exists, or null
     */
    public Sample getMother() {
        return infoDB.getSample(maternalID);
    }
    /**
     * Get the sample's father
     * @return sample object with relationship father, if exists, or null
     */
    public Sample getFather() {
        return infoDB.getSample(paternalID);
    }
    /**
     * Get gender of the sample
     * @return property of key "gender" - must be of type Gender
     */
    public Gender getGender() {
        return gender;
    }
    @Override
    public int compareTo(final Sample sample) {
        return ID.compareTo(sample.getID());
    }
    @Override
    public String toString() {
        return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
                getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
                getOtherPhenotype(), properties);
    }
 //    // -------------------------------------------------------------------------------------
 //    //
 //    // code for working with additional -- none standard -- properites
 //    //
 //    // -------------------------------------------------------------------------------------
 //
 //    public Map<String, Object> getExtraProperties() {
 //        return Collections.unmodifiableMap(properties);
 //    }
 //
 //    /**
 //     * Get one property
 //     * @param key key of property
 //     * @return value of property as generic object
 //     */
 //    public Object getExtraPropertyValue(final String key) {
 //        return properties.get(key);
 //    }
 //
 //    /**
 //     *
 //     * @param key property key
 //     * @return true if sample has this property (even if its value is null)
 //     */
 //    public boolean hasExtraProperty(String key) {
 //        return properties.containsKey(key);
 //    }
    @Override
    public int hashCode() {
        return ID.hashCode();
    }
    @Override
    public boolean equals(final Object o) {
        if(o == null)
            return false;
        if(o instanceof Sample) {
            Sample otherSample = (Sample)o;
            return ID.equals(otherSample.ID) &&
                    equalOrNull(familyID, otherSample.familyID) &&
                    equalOrNull(paternalID, otherSample.paternalID) &&
                    equalOrNull(maternalID, otherSample.maternalID) &&
                    equalOrNull(gender, otherSample.gender) &&
                    equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
                    equalOrNull(affection, otherSample.affection) &&
                    equalOrNull(properties, otherSample.properties);
        }
        return false;
    }
    private final static boolean equalOrNull(final Object o1, final Object o2) {
        if ( o1 == null )
            return o2 == null;
        else
            return o2 == null ? false : o1.equals(o2);
    }
    private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
        if ( o1 == null || o1.equals(emptyValue) ) {
            // take o2 if both are null, otherwise keep o2
            return o2 == null ? null : o2;
        } else {
            if ( o2 == null || o2.equals(emptyValue) )
                return o1; // keep o1, since it's a real value
            else {
                // both o1 and o2 have a value
                if ( o1 == o2 )
                    return o1;
                else
                    throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
            }
        }
    }
    public final static Sample mergeSamples(final Sample prev, final Sample next) {
        if ( prev.equals(next) )
            return next;
        else {
            return new Sample(prev.getID(), prev.infoDB,
                    mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
                    mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
                    mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
                    mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
                    mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
                    mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
                    //mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
@ -0,0 +1,183 @@
 package org.broadinstitute.sting.gatk.samples;
 import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import java.util.*;
 /**
 *
 */
 public class SampleDB {
    /**
     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
     * this is stored as a HashMap.
     */
    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
    /**
     * Constructor takes both a SAM header and sample files because the two must be integrated.
     */
    public SampleDB() {
    }
    /**
     * Protected function to add a single sample to the database
     *
     * @param sample to be added
     */
    protected SampleDB addSample(Sample sample) {
        Sample prev = samples.get(sample.getID());
        if ( prev != null )
            sample = Sample.mergeSamples(prev, sample);
        samples.put(sample.getID(), sample);
        return this;
    }
    // --------------------------------------------------------------------------------
    //
    // Functions for getting a sample from the DB
    //
    // --------------------------------------------------------------------------------
    /**
     * Get a sample by its ID
     * If an alias is passed in, return the main sample object 
     * @param id
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(String id) {
        return samples.get(id);
    }
    /**
     *
     * @param read
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final SAMRecord read) {
        return getSample(read.getReadGroup());
    }
    /**
     *
     * @param rg
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final SAMReadGroupRecord rg) {
        return getSample(rg.getSample());
    }
    /**
     * @param g Genotype
     * @return sample Object with this ID, or null if this does not exist
     */
    public Sample getSample(final Genotype g) {
        return getSample(g.getSampleName());
    }
    // --------------------------------------------------------------------------------
    //
    // Functions for accessing samples in the DB
    //
    // --------------------------------------------------------------------------------
    /**
     * Get number of sample objects
     * @return size of samples map
     */
    public int sampleCount() {
        return samples.size();
    }
    public Set<Sample> getSamples() {
        return new HashSet<Sample>(samples.values());
    }
    public Collection<String> getSampleNames() {
        return Collections.unmodifiableCollection(samples.keySet());
    }
    /**
     * Takes a collection of sample names and returns their corresponding sample objects
     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
     * @param sampleNameList Set of sample names
     * @return Corresponding set of samples
     */
    public Set<Sample> getSamples(Collection<String> sampleNameList) {
        HashSet<Sample> samples = new HashSet<Sample>();
        for (String name : sampleNameList) {
            try {
                samples.add(getSample(name));
            }
            catch (Exception e) {
                throw new StingException("Could not get sample with the following ID: " + name, e);
            }
        }
        return samples;
    }
    // --------------------------------------------------------------------------------
    //
    // Higher level pedigree functions
    //
    // --------------------------------------------------------------------------------
    /**
     * Returns a sorted set of the family IDs in all samples (excluding null ids)
     * @return
     */
    public final Set<String> getFamilyIDs() {
        return getFamilies().keySet();
    }
    /**
     * Returns a map from family ID -> set of family members for all samples with
     * non-null family ids
     *
     * @return
     */
    public final Map<String, Set<Sample>> getFamilies() {
        final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
        for ( final Sample sample : samples.values() ) {
            final String famID = sample.getFamilyID();
            if ( famID != null ) {
                if ( ! families.containsKey(famID) )
                    families.put(famID, new TreeSet<Sample>());
                families.get(famID).add(sample);
            }
        }
        return families;
    }
    /**
     * Return all samples with a given family ID
     * @param familyId
     * @return
     */
    public Set<Sample> getFamily(String familyId) {
        return getFamilies().get(familyId);
    }
    /**
     * Returns all children of a given sample
     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
     * @param sample
     * @return
     */
    public Set<Sample> getChildren(Sample sample) {
        final HashSet<Sample> children = new HashSet<Sample>();
        for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
            if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
                children.add(familyMember);
            }
        }
        return children;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
@ -0,0 +1,153 @@
 /*
 * Copyright (c) 2011, The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.samples;
 import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.util.*;
 /**
 *
 */
 public class SampleDBBuilder {
    PedigreeValidationType validationStrictness;
    final SampleDB sampleDB = new SampleDB();
    final GenomeAnalysisEngine engine;
    Set<Sample> samplesFromDataSources = new HashSet<Sample>();
    Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
    /** for testing only */
    protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
        engine = null;
        this.validationStrictness = validationStrictness;
    }
    /**
     * Constructor takes both a SAM header and sample files because the two must be integrated.
     */
    public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
        this.engine = engine;
        this.validationStrictness = validationStrictness;
    }
    /**
     * Hallucinates sample objects for all the samples in the SAM file and stores them
     */
    public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
        addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
        return this;
    }
    public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
        for (final String sampleName : sampleNames) {
            if (sampleDB.getSample(sampleName) == null) {
                final Sample newSample = new Sample(sampleName, sampleDB);
                sampleDB.addSample(newSample);
                samplesFromDataSources.add(newSample); // keep track of data source samples
            }
        }
        return this;
    }
    public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
        for (final File pedFile : pedigreeFiles) {
            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
            samplesFromPedigrees.addAll(samples);
        }
        return this;
    }
    public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
        for (final String pedString : pedigreeStrings) {
            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
            samplesFromPedigrees.addAll(samples);
        }
        return this;
    }
    /**
     * Parse one sample file and integrate it with samples that are already there
     * Fail quickly if we find any errors in the file
     */
    private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
        final PedReader reader = new PedReader();
        try {
            return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
        } catch ( FileNotFoundException e ) {
            throw new UserException.CouldNotReadInputFile(sampleFile, e);
        }
    }
    private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
        final PedReader reader = new PedReader();
        return reader.parse(string, getMissingFields(string), sampleDB);
    }
    public SampleDB getFinalSampleDB() {
        validate();
        return sampleDB;
    }
    public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
        if ( engine == null )
            return EnumSet.noneOf(PedReader.MissingPedField.class);
        else {
            final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
            return PedReader.parseMissingFieldTags(engineArg, posTags);
        }
    }
    // --------------------------------------------------------------------------------
    //
    // Validation
    //
    // --------------------------------------------------------------------------------
    protected final void validate() {
        if ( validationStrictness == PedigreeValidationType.SILENT )
            return;
        else {
            // check that samples in data sources are all annotated, if anything is annotated
            if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
                final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
                for ( final Sample pSample : samplesFromPedigrees )
                    sampleNamesFromPedigrees.add(pSample.getID());
                for ( final Sample dsSample : samplesFromDataSources )
                    if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
                        throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
            }
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -364,8 +364,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
        // count up the number of skipped reads by summing over all filters
        long nSkippedReads = 0L;
-        for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())
+        for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
-            nSkippedReads += countsByFilter.getValue();
+            nSkippedReads += countsByFilter;
        logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", elapsed, elapsed / 60, elapsed / 3600));
        if ( cumulativeMetrics.getNumReadsSeen() > 0 )
@ -373,10 +373,10 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
                    nSkippedReads,
                    cumulativeMetrics.getNumReadsSeen(),
                    100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen())));
-        for ( Map.Entry<Class, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
+        for ( Map.Entry<String, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
            long count = filterCounts.getValue();
            logger.info(String.format("  -> %d reads (%.2f%% of total) failing %s",
-                    count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey())));
+                    count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), filterCounts.getKey()));
        }
        if ( performanceLog != null ) performanceLog.close();
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
 import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.*;
@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
        return "dups";
    }
-    private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
+    private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
        GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
-        ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
+        ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();
        l.add(read);
        for (SAMRecord read2 : iter) {
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                iter.pushback(read2);
                break;
            } else {
-                l.add(read2);
+                l.add((GATKSAMRecord) read2);
            }
        }
@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
     * @param reads the list of reads to split into unique molecular samples
     * @return
     */
-    protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) {
+    protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
-        Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>();
+        Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();
        // for each read, find duplicates, and either add the read to its duplicate list or start a new one
-        for ( SAMRecord read : reads ) {
+        for ( GATKSAMRecord read : reads ) {
-            List<SAMRecord> readSet = findDuplicateReads(read, readSets);
+            List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);
            if ( readSet == null ) {
-                readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read)));    // copy so I can add to the list
+                readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read)));    // copy so I can add to the list
            } else {
                readSet.add(read);
            }
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
     * @param readSets
     * @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
     */
-    protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
+    protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
        if ( read.getReadPairedFlag() ) {
            // paired
            final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());
-            for (List<SAMRecord> reads : readSets) {
+            for (List<GATKSAMRecord> reads : readSets) {
-                SAMRecord key = reads.get(0);
+                GATKSAMRecord key = reads.get(0);
                // read and key start at the same place, and either the this read and the key
                // share a mate location or the read is flagged as a duplicate
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                }
            }
        } else {
-            for (List<SAMRecord> reads : readSets) {
+            for (List<GATKSAMRecord> reads : readSets) {
-                SAMRecord key = reads.get(0);
+                GATKSAMRecord key = reads.get(0);
                boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
                //System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
                //        read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
            // get the genome loc from the read
            GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
-            Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
+            Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
            if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));
            // Jump forward in the reference to this locus location
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 /*
 * Copyright (c) 2009 The Broad Institute
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
            // if the read is mapped, create a metadata tracker
            ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;
-            final boolean keepMeP = walker.filter(refContext, read);
+            final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
            if (keepMeP) {
-                M x = walker.map(refContext, read, tracker); // the tracker can be null
+                M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
                sum = walker.reduce(x, sum);
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
 import net.sf.picard.reference.ReferenceSequence;
 import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFileFactory;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.util.StringUtil;
 import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
 import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipreads.ReadClipper;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import java.io.File;
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
    /**
     * The reads map function.
     *
     *
     * @param ref  the reference bases that correspond to our read, if a reference was provided
-     * @param read the read itself, as a SAMRecord
+     * @param read the read itself, as a GATKSAMRecord
     * @return the ReadClipper object describing what should be done to clip this read
     */
-    public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
            if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
                read = ReadUtils.replaceSoftClipsWithMatches(read);
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     */
    private void clipSequences(ReadClipperWithData clipper) {
        if (sequencesToClip != null) {                // don't bother if we don't have any sequences to clip
-            SAMRecord read = clipper.getRead();
+            GATKSAMRecord read = clipper.getRead();
            ClippingData data = clipper.getData();
            for (SeqToClip stc : sequencesToClip) {
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     * @param stop
     * @return
     */
-    private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
+    private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
        if (read.getReadNegativeStrandFlag())
            return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
        else
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     */
    private void clipCycles(ReadClipperWithData clipper) {
        if (cyclesToClip != null) {
-            SAMRecord read = clipper.getRead();
+            GATKSAMRecord read = clipper.getRead();
            ClippingData data = clipper.getData();
            for (Pair<Integer, Integer> p : cyclesToClip) {   // iterate over each cycle range
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     * @param clipper
     */
    private void clipBadQualityScores(ReadClipperWithData clipper) {
-        SAMRecord read = clipper.getRead();
+        GATKSAMRecord read = clipper.getRead();
        ClippingData data = clipper.getData();
        int readLen = read.getReadBases().length;
        byte[] quals = read.getBaseQualities();
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
        if ( clipper == null )
            return data;
-        SAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
+        GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
        if (outputBam != null) {
            outputBam.addAlignment(clippedRead);
        } else {
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
    public class ReadClipperWithData extends ReadClipper {
        private ClippingData data;
-        public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
+        public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
            super(read);
            data = new ClippingData(clipSeqs);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.util.List;
 import java.util.Set;
@ -20,11 +20,11 @@ import java.util.Set;
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
 public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
-    public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
+    public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
        return true;    // We are keeping all the reads
    }
-    public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets );
+    public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );
    // Given result of map function
    public abstract ReduceType reduceInit();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.text.XReadLines;
 import java.io.File;
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite
    /**
     * The reads filter function.
     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return true if the read passes the filter, false if it doesn't
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        return namesToKeep.contains(read.getReadName());
 	}
    /**
     * The reads map function.
     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return the read itself
     */
-    public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        return read;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
@ -1,9 +1,9 @@
 package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import java.io.PrintStream;
 import java.text.DecimalFormat;
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {
    private FlagStat myStat = new FlagStat();
-    public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        myStat.readCount++;
        if (read.getReadFailsVendorQualityCheckFlag()) {
            myStat.QC_failure++;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -17,7 +17,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 */
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
-@PartitionBy(PartitionType.INTERVAL)
+@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java
@ -34,6 +34,12 @@ public enum PartitionType {
     */
    NONE,
    /**
     * The walker inputs can be chunked down to individual
     * reads.
     */
    READ,
    /**
     * The walker inputs can be chunked down to the
     * per-locus level.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -40,6 +40,7 @@ import java.util.TreeSet;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 /**
 * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
    /**
     * The reads filter function.
     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return true if the read passes the filter, false if it doesn't
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        // check the read group
        if  ( readGroup != null ) {
            SAMReadGroupRecord myReadGroup = read.getReadGroup();
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {
    /**
     * The reads map function.
     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return the read itself
     */
-    public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        return read;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 /**
 * Created by IntelliJ IDEA.
@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 * To change this template use File | Settings | File Templates.
 */
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
-@PartitionBy(PartitionType.CONTIG)
+@PartitionBy(PartitionType.READ)
 public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    public boolean requiresOrderedReads() { return false; }
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
    /** Must return true for reads that need to be processed. Reads, for which this method return false will
     * be skipped by the engine and never passed to the walker.
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        // We are keeping all the reads
        return true;
    }
    // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
-    public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
+    public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import java.util.ArrayList;
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
        logger.info("SplitSamFile version: " + VERSION);
    }
-    public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        return read;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -25,15 +25,17 @@
 package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
 import org.broadinstitute.sting.gatk.samples.Sample;
 import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;
 import java.util.List;
@ -77,6 +79,23 @@ public abstract class Walker<MapType, ReduceType> {
        return toolkit;
    }
    /**
     * Gets the master sequence dictionary for this walker
     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
     * @return
     */
    protected SAMSequenceDictionary getMasterSequenceDictionary() {
        return getToolkit().getMasterSequenceDictionary();
    }
    protected SampleDB getSampleDB() {
        return getToolkit().getSampleDB();
    }
    protected Sample getSample(final String id) {
        return getToolkit().getSampleDB().getSample(id);
    }
    /**
     * (conceptual static) method that states whether you want to see reads piling up at a locus
     * that contain a deletion at the locus.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
                    continue;
                }
                // todo -- actually care about indel length from the pileup (agnostic at the moment)
-                int refCount = indelPileup.size();
+                int refCount = indelPileup.getNumberOfElements();
                int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();
                if ( refCount + altCount == 0 ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
        if (!g.isHet())
            return null;
-        Set<Allele> altAlleles = vc.getAlternateAlleles();
+        Collection<Allele> altAlleles = vc.getAlternateAlleles();
        if ( altAlleles.size() == 0 )
            return null;
--- a/Show More
+++ b/Show More