Merge branch 'master' of ssh://gsa1/humgen/gsa-scr1/gsa-engineering/git/stable

2011-11-16 09:57:13 -05:00 · 2011-11-16 09:57:13 -05:00 · 6a5d5e7ac9
parent 7ac5cf8430 348f2db7fd
commit 6a5d5e7ac9
375 changed files with 13496 additions and 9533 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,20 @@
+/*.bam
+/*.bai
+/*.bed
+*.idx
+*~
+/*.vcf
+/*.txt
+/*.csh
+/.*
+/*.pdf
+/*.eval
+*.ipr
+*.iws
+*.iml
+.DS_Store
+queueScatterGather
+/foo*
+/bar*
+integrationtests/
+public/testdata/onTheFlyOutputTest.vcf
--- a/build.xml
+++ b/build.xml
@ -28,6 +28,8 @@

    <property name="build.dir" value="build" />
    <property name="dist.dir" value="dist" />
+    <property name="contract.dump.dir" value="dump" />
+    <property name="pipelinetest.dir" value="pipelinetests" />
    <property name="lib.dir" value="lib" />
    <property name="external.dir" value="external" />
    <property name="public.dir" value="public" />
@ -35,18 +37,26 @@
    <property name="java.public.source.dir" value="${public.dir}/java/src" />
    <property name="java.private.source.dir" value="${private.dir}/java/src" />
    <property name="java.classes" value="${build.dir}/java/classes" />
+    <property name="R.public.scripts.dir" value="${public.dir}/R/scripts" />
+    <property name="R.private.scripts.dir" value="${private.dir}/R/scripts" />
+    <property name="R.public.src.dir" value="${public.dir}/R/src" />
+    <!-- Legacy: Installing libraries back into the source directory
+    instead of the build or dist directory... intentionally avoids ant clean?? -->
+    <property name="R.library.dir" value="${public.dir}/R" />
+    <property name="R.tar.dir" value="${build.dir}/R/src" />
+    <property name="R.package.path" value="org/broadinstitute/sting/utils/R" />
    <property name="resource.file" value="StingText.properties" />
    <property name="resource.path" value="${java.classes}/StingText.properties" />

    <property name="scala.public.source.dir" value="${public.dir}/scala/src" />
-    <property name="scala.private.source.dir" value="${private.dir}/scala/src" />    
+    <property name="scala.private.source.dir" value="${private.dir}/scala/src" />
    <property name="scala.classes" value="${build.dir}/scala/classes" />

    <property name="queue-extensions.source.dir" value="${build.dir}/queue-extensions/src" />

    <property name="javadoc.dir" value="javadoc" />
    <property name="scaladoc.dir" value="scaladoc" />
-    
+
    <!-- Contracts for Java -->
    <!-- By default, enabled only for test targets -->
    <!-- To disable for test targets, run with -Duse.contracts=false -->
@ -60,7 +70,7 @@

    <!-- do we want to halt on failure of a unit test? default to yes (Bamboo uses 'no') -->
    <property name="halt" value="yes" />
-    
+
    <!-- should our unit test output go to a file or the screen?
    false means it goes to the screen (default) true to file -->
    <property name="usefile" value="false" />
@ -82,7 +92,7 @@
        <patternset refid="java.source.pattern" />
    </fileset>

-    <!-- terrible hack to get gatkdocs to see all files --> 
+    <!-- terrible hack to get gatkdocs to see all files -->
    <patternset id="all.java.source.pattern">
        <include name="${java.public.source.dir}/**/*.java" />
        <include name="${java.private.source.dir}/**/*.java" />
@ -113,7 +123,7 @@
      <exclude name="testng*.jar" />
      <exclude name="bcel*.jar" />
    </patternset>
-    
+
    <path id="external.dependencies">
      <fileset dir="${lib.dir}">
        <patternset refid="dependency.mask" />
@ -154,16 +164,18 @@
        <property name="ivy.jar.file" value="ivy-${ivy.install.version}.jar"/>
        <property name="ivy.settings.dir" value="settings"/>
        <property file="${ivy.settings.dir}/ivysettings.properties"/>
-  
+
      <mkdir dir="${lib.dir}"/>
      <mkdir dir="${ivy.jar.dir}"/>

+      <!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
      <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
           dest="${ivy.jar.dir}/${ivy.jar.file}"
           usetimestamp="true"/>
      <taskdef resource="org/apache/ivy/ant/antlib.xml"
               uri="antlib:org.apache.ivy.ant"
               classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
+
      <ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
        <property name="init.resolve.done" value="true"/>
    </target>
@ -209,11 +221,11 @@
        <equals arg1="${git.describe.exit.value}" arg2="0" />
      </condition>
    </target>
-    
+
    <target name="tagged.build.version" depends="git.describe" if="git.describe.succeeded">
        <property name="build.version" value="${git.describe.output}" />
    </target>
-    
+
    <target name="git.rev-parse" depends="git.describe" unless="git.describe.succeeded">
      <exec executable="git" outputproperty="git.rev-parse.output" resultproperty="git.rev-parse.exit.value" failonerror="false">
        <arg line="rev-parse HEAD" />
@ -222,11 +234,11 @@
        <equals arg1="${git.rev-parse.exit.value}" arg2="0" />
      </condition>
    </target>
-    
+
    <target name="untagged.build.version" depends="git.rev-parse" if="git.rev-parse.succeeded">
        <property name="build.version" value="${git.rev-parse.output}" />
    </target>
-    
+
    <target name="generate.build.version" depends="tagged.build.version, untagged.build.version">
        <!-- Set build.version to exported if no other value has been set -->
        <property name="build.version" value="exported" />
@ -264,7 +276,7 @@
        <echo message="Scala build     : ${scala.target}"/>
      <echo message="source revision : ${build.version}"/>
      <echo message="build time      : ${build.timestamp}" />
-      
+
      <condition property="include.private">
        <equals arg1="${gatk.target}" arg2="private" casesensitive="false" />
      </condition>
@ -310,13 +322,13 @@
    <target name="gatk.compile.public.source" depends="init,resolve">
        <javac fork="true" srcdir="${java.public.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
          <compilerarg value="-proc:none"/>
-        </javac>    
+        </javac>
    </target>

    <target name="gatk.compile.private.source" depends="gatk.compile.public.source" if="include.private">
        <javac fork="true" srcdir="${java.private.source.dir}" memoryMaximumSize="512m" destdir="${java.classes}" debug="true" debuglevel="lines,vars,source" classpathref="external.dependencies" tempdir="${java.io.tmpdir}">
          <compilerarg value="-proc:none"/>
-        </javac>    
+        </javac>
    </target>

    <target name="gatk.compile.external.source" depends="gatk.compile.public.source,gatk.compile.private.source">
@ -325,11 +337,11 @@
            <property name="dist.dir" value="${external.dist.dir}" />
            <property name="gatk.classpath" value="${external.gatk.classpath}" />
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
-        </subant>    
+        </subant>
    </target>

-    <target name="gatk.compile.source" 
-            depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source" 
+    <target name="gatk.compile.source"
+            depends="gatk.compile.public.source,gatk.compile.private.source,gatk.compile.external.source"
            description="compile the GATK source" />

   <target name="gatk.contracts.public" depends="gatk.compile.source" if="include.contracts">
@ -339,9 +351,9 @@
                <pathelement path="${java.classes}" />
            </classpath>
            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+            <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
            <compilerarg value="-proc:only"/>
-        </javac>   
+        </javac>
   </target>

    <target name="check.contracts.private" depends="gatk.contracts.public">
@ -360,14 +372,14 @@
                <pathelement path="${java.classes}" />
            </classpath>
            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
+            <compilerarg value="-Acom.google.java.contract.dump=${contract.dump.dir}"/>
            <compilerarg value="-proc:only"/>
-        </javac>   
+        </javac>
   </target>

-    <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private" 
+    <target name="gatk.contracts" depends="gatk.contracts.public,gatk.contracts.private"
            description="create GATK contracts" if="include.contracts" />
-    
+
    <target name="gatk.compile" depends="init,resolve,gatk.compile.source,gatk.contracts" />

    <target name="init.queue-extensions.generate" depends="gatk.compile">
@ -411,9 +423,9 @@
            <src path="${scala.public.source.dir}" />
            <src path="${queue-extensions.source.dir}" />
            <include name="**/*.scala"/>
-        </scalac>    
+        </scalac>
    </target>
-    
+
    <target name="check.scala.private" depends="scala.compile.public">
        <condition property="include.scala.private">
          <and>
@ -422,12 +434,12 @@
          </and>
        </condition>
    </target>
-    
+
    <target name="scala.compile.private" depends="check.scala.private" if="include.scala.private">
        <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.classes}" classpathref="scala.dependencies" deprecation="yes" unchecked="yes">
            <src path="${scala.private.source.dir}" />
            <include name="**/*.scala"/>
-        </scalac>       
+        </scalac>
    </target>

    <target name="scala.compile" depends="scala.compile.public,scala.compile.private" if="scala.include" description="compile Scala" />
@ -530,6 +542,11 @@

    <target name="sting.compile" depends="gatk.compile, scala.compile" />

+    <target name="R.public.tar">
+        <mkdir dir="${R.tar.dir}/${R.package.path}" />
+        <tar compression="gzip" basedir="${R.public.src.dir}/${R.package.path}" includes="gsalib/**" destfile="${R.tar.dir}/${R.package.path}/gsalib.tar.gz" />
+    </target>
+
    <target name="init.jar" depends="sting.compile,extracthelp">
        <mkdir dir="${dist.dir}"/>
        <copy todir="${dist.dir}">
@ -537,7 +554,7 @@
        </copy>
    </target>

-    <target name="sting-utils.jar" depends="gatk.compile, init.jar">
+    <target name="sting-utils.jar" depends="gatk.compile, init.jar, R.public.tar">
        <jar jarfile="${dist.dir}/StingUtils.jar">
            <fileset dir="${java.classes}">
              <include name="**/utils/**/*.class"/>
@ -549,6 +566,15 @@
            <fileset dir="${java.classes}" includes="**/sting/jna/**/*.class"/>
            <fileset dir="${java.classes}" includes="net/sf/picard/**/*.class"/>
            <fileset dir="${java.classes}" includes="net/sf/samtools/**/*.class"/>
+            <fileset dir="${R.tar.dir}">
+                <include name="**/${R.package.path}/**/*.tar.gz"/>
+            </fileset>
+            <fileset dir="${R.public.scripts.dir}">
+              <include name="**/utils/**/*.R"/>
+            </fileset>
+            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
+              <include name="**/utils/**/*.R"/>
+            </fileset>
            <manifest>
                <attribute name="Premain-Class" value="org.broadinstitute.sting.utils.instrumentation.Sizeof" />
            </manifest>
@ -577,6 +603,14 @@
              <include name="**/gatk/**/*.class" />
              <include name="**/alignment/**/*.class"/>
            </fileset>
+            <fileset dir="${R.public.scripts.dir}">
+                <include name="**/gatk/**/*.R"/>
+                <include name="**/alignment/**/*.R"/>
+            </fileset>
+            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
+                <include name="**/gatk/**/*.R"/>
+                <include name="**/alignment/**/*.R"/>
+            </fileset>
            <manifest>
                <attribute name="Main-Class" value="org.broadinstitute.sting.gatk.CommandLineGATK" />
            </manifest>
@ -591,6 +625,14 @@
            <include name="**/analyzecovariates/**/*.class" />
            <include name="**/gatk/walkers/recalibration/*.class" />
          </fileset>
+            <fileset dir="${R.public.scripts.dir}">
+                <include name="**/analyzecovariates/**/*.R"/>
+                <include name="**/gatk/walkers/recalibration/**/*.R"/>
+            </fileset>
+            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
+                <include name="**/analyzecovariates/**/*.R"/>
+                <include name="**/gatk/walkers/recalibration/**/*.R"/>
+            </fileset>
          <manifest>
            <attribute name="Main-Class" value="org.broadinstitute.sting.analyzecovariates.AnalyzeCovariates" />
          </manifest>
@ -603,28 +645,7 @@
            <fileset dir="${external.dir}" includes="*/build.xml" erroronmissingdir="false" />
        </subant>
    </target>
-<!--
-    <target name="gatk.oneoffs.jar" depends="gatk.compile, init.jar"
-            description="generate the GATK oneoffs distribution" if="include.oneoffs">
-        <jar jarfile="${dist.dir}/CompareBAMAlignments.jar" whenmanifestonly="skip">
-          <fileset dir="${java.classes}">
-            <include name="**/tools/**/*.class" />
-          </fileset>
-          <manifest>
-            <attribute name="Main-Class" value="org.broadinstitute.sting.oneoffprojects.tools.CompareBAMAlignments" />
-          </manifest>
-        </jar>

-        <jar jarfile="${dist.dir}/SliceBams.jar" whenmanifestonly="skip">
-          <fileset dir="${java.classes}">
-            <include name="**/tools/**/*.class" />
-          </fileset>
-          <manifest>
-            <attribute name="Main-Class" value="org.broadinstitute.sting.playground.tools.SliceBams" />
-          </manifest>
-        </jar>
-    </target>
-->
    <target name="scala.jar" depends="scala.compile, init.jar" if="scala.include">
        <jar jarfile="${dist.dir}/GATKScala.jar">
          <fileset dir="${scala.classes}">
@ -641,6 +662,12 @@
            <fileset dir="${java.classes}">
              <include name="org/broadinstitute/sting/queue/**/*.class" />
            </fileset>
+            <fileset dir="${R.public.scripts.dir}">
+                <include name="org/broadinstitute/sting/queue/**/*.R"/>
+            </fileset>
+            <fileset dir="${R.private.scripts.dir}" erroronmissingdir="false">
+                <include name="org/broadinstitute/sting/queue/**/*.R"/>
+            </fileset>
            <manifest>
                <attribute name="Main-Class" value="org.broadinstitute.sting.queue.QCommandLine" />
            </manifest>
@ -680,20 +707,7 @@
        </jar>

    </target>
-<!--
-    <target name="gatk.oneoffs.manifests" depends="gatk.oneoffs.jar, init.manifests" if="include.oneoffs">
-        <jar jarfile="${dist.dir}/CompareBAMAlignments.jar" update="true" whenmanifestonly="skip">
-            <manifest>
-                <attribute name="Class-Path" value="${jar.classpath}" />
-            </manifest>
-        </jar>
-        <jar jarfile="${dist.dir}/SliceBams.jar" update="true" whenmanifestonly="skip">
-            <manifest>
-                <attribute name="Class-Path" value="${jar.classpath}" />
-            </manifest>
-        </jar>
-    </target>
-->
+
    <target name="queue.manifests" depends="queue.jar, init.manifests" if="scala.include">
        <jar jarfile="${dist.dir}/Queue.jar" update="true" >
            <manifest>
@ -778,10 +792,6 @@
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
-<!--
-            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
- -->
        </javac>
    </target>

@ -798,10 +808,6 @@
                <pathelement location="${testng.jar}"/>
            </classpath>
            <compilerarg value="-proc:none"/>
-<!--
-            <compilerarg value="-Acom.google.java.contract.debug"/>
-            <compilerarg value="-Acom.google.java.contract.dump=dump/"/>
- -->
        </javac>
    </target>

@ -849,6 +855,9 @@
        <pathelement location="${java.private.test.classes}" />
        <pathelement location="${scala.public.test.classes}" />
        <pathelement location="${scala.private.test.classes}" />
+        <pathelement location="${R.tar.dir}" />
+        <pathelement location="${R.public.scripts.dir}" />
+        <pathelement location="${R.private.scripts.dir}" />
    </path>

    <path id="testng.gatk.releasetest.classpath">
@ -1185,19 +1194,18 @@
    </target>

    <target name="clean" description="clean up" depends="clean.javadoc,clean.scaladoc,clean.gatkdocs">
-        <delete dir="out"/>
        <delete dir="${build.dir}"/>
        <delete dir="${lib.dir}"/>
-        <delete dir="dump"/>
+        <delete dir="${contract.dump.dir}"/>
        <delete dir="${staging.dir}"/>
        <delete dir="${dist.dir}"/>
-        <delete dir="pipelinetests"/>
+        <delete dir="${pipelinetest.dir}"/>
    </target>

    <!-- Build gsalib R module -->
    <target name="gsalib">
      <exec executable="R" failonerror="true">
-        <arg line="R CMD INSTALL -l public/R/ public/R/src/gsalib/" />
+        <arg line="R CMD INSTALL -l ${R.library.dir} ${R.public.src.dir}/${R.package.path}/gsalib" />
      </exec>
    </target>
 </project>
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_OtherCovariate.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript

+library(tools)
+
 args <- commandArgs(TRUE)
 verbose = TRUE

@ -47,6 +49,9 @@ if( is.numeric(c$Covariate) ) {
 }
 dev.off()

+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}

 #
 # Plot mean quality versus the covariate
@ -69,6 +74,10 @@ if( is.numeric(c$Covariate) ) {
 }
 dev.off()

+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
+
 #
 # Plot histogram of the covariate
 #
@ -106,3 +115,7 @@ if( is.numeric(c$Covariate) ) {
 	axis(2,axTicks(2), format(axTicks(2), scientific=F))
 }
 dev.off()
+
+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
--- a/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
+++ b/public/R/scripts/org/broadinstitute/sting/analyzecovariates/plot_residualError_QualityScoreCovariate.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript

+library(tools)
+
 args <- commandArgs(TRUE)

 input = args[1]
@ -33,6 +35,10 @@ points(f$Qreported, f$Qempirical, type="p", col="maroon1", pch=16)
 abline(0,1, lty=2)
 dev.off()

+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
+
 #
 # Plot Q empirical histogram
 #
@ -52,6 +58,10 @@ points(hst2$f.Qempirical, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()

+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
+
 #
 # Plot Q reported histogram
 #
@ -68,3 +78,7 @@ plot(hst$e.Qreported, hst$e.nBases, type="h", lwd=4, xlim=c(0,maxQ), ylim=c(0,yM
 points(hst2$f.Qreported, hst2$f.nBases, type="h", lwd=4, col="maroon1")
 axis(2,axTicks(2), format(axTicks(2), scientific=F))
 dev.off()
+
+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
--- a/public/R/scripts/org/broadinstitute/sting/gatk/walkers/variantrecalibration/plot_Tranches.R
+++ b/public/R/scripts/org/broadinstitute/sting/gatk/walkers/variantrecalibration/plot_Tranches.R
@ -1,5 +1,7 @@
 #!/bin/env Rscript

+library(tools)
+
 args <- commandArgs(TRUE)
 verbose = TRUE

@ -85,3 +87,7 @@ if ( ! is.null(sensitivity) ) {
 }

 dev.off()
+
+if (exists('compactPDF')) {
+  compactPDF(outfile)
+}
--- a/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
+++ b/public/R/scripts/org/broadinstitute/sting/queue/util/queueJobReport.R
@ -12,20 +12,20 @@ if ( onCMDLine ) {
  inputFileName = args[1]
  outputPDF = args[2]
 } else {
-  #inputFileName = "~/Desktop/broadLocal/GATK/unstable/report.txt"
-  inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
+  inputFileName = "~/Desktop/broadLocal/GATK/unstable/wgs.jobreport.txt"
+  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/Q-25718@node1149.jobreport.txt"
  #inputFileName = "/humgen/gsa-hpprojects/dev/depristo/oneOffProjects/rodPerformanceGoals/history/report.082711.txt"
  outputPDF = NA
 }

-RUNTIME_UNITS = "(sec)"
-ORIGINAL_UNITS_TO_SECONDS = 1/1000
+RUNTIME_UNITS = "(hours)"
+ORIGINAL_UNITS_TO_RUNTIME_UNITS = 1/1000/60/60

 # 
 # Helper function to aggregate all of the jobs in the report across all tables
 #
 allJobsFromReport <- function(report) {
-  names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts")
+  names <- c("jobName", "startTime", "analysisName", "doneTime", "exechosts", "runtime")
  sub <- lapply(report, function(table) table[,names])
  do.call("rbind", sub)
 }
@ -33,7 +33,7 @@ allJobsFromReport <- function(report) {
 #
 # Creates segmentation plots of time (x) vs. job (y) with segments for the duration of the job
 #
-plotJobsGantt <- function(gatkReport, sortOverall) {
+plotJobsGantt <- function(gatkReport, sortOverall, includeText) {
  allJobs = allJobsFromReport(gatkReport)
  if ( sortOverall ) {
    title = "All jobs, by analysis, by start time"
@ -44,16 +44,18 @@ plotJobsGantt <- function(gatkReport, sortOverall) {
  }
  allJobs$index = 1:nrow(allJobs)
  minTime = min(allJobs$startTime)
-  allJobs$relStartTime = allJobs$startTime - minTime
+  allJobs$relStartTime = allJobs$startTime - minTime 
  allJobs$relDoneTime = allJobs$doneTime - minTime
  allJobs$ganttName = paste(allJobs$jobName, "@", allJobs$exechosts)
  maxRelTime = max(allJobs$relDoneTime)
  p <- ggplot(data=allJobs, aes(x=relStartTime, y=index, color=analysisName))
-  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=2, arrow=arrow(length = unit(0.1, "cm")))
-  p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
+  p <- p + theme_bw()
+  p <- p + geom_segment(aes(xend=relDoneTime, yend=index), size=1, arrow=arrow(length = unit(0.1, "cm")))
+  if ( includeText )
+    p <- p + geom_text(aes(x=relDoneTime, label=ganttName, hjust=-0.2), size=2)
  p <- p + xlim(0, maxRelTime * 1.1)
-  p <- p + xlab(paste("Start time (relative to first job)", RUNTIME_UNITS))
-  p <- p + ylab("Job")
+  p <- p + xlab(paste("Start time, relative to first job", RUNTIME_UNITS))
+  p <- p + ylab("Job number")
  p <- p + opts(title=title)
  print(p)
 }
@ -119,7 +121,7 @@ plotGroup <- function(groupTable) {
  if ( length(groupAnnotations) == 1 && dim(sub)[1] > 1 ) {
    # todo -- how do we group by annotations?
    p <- ggplot(data=sub, aes(x=runtime)) + geom_histogram()
-    p <- p + xlab("runtime in seconds") + ylab("No. of jobs")
+    p <- p + xlab(paste("runtime", RUNTIME_UNITS)) + ylab("No. of jobs")
    p <- p + opts(title=paste("Job runtime histogram for", name))
    print(p)
  }
@ -139,9 +141,9 @@ print(paste("Project          :", inputFileName))

 convertUnits <- function(gatkReportData) {
  convertGroup <- function(g) {
-    g$runtime = g$runtime * ORIGINAL_UNITS_TO_SECONDS
-    g$startTime = g$startTime * ORIGINAL_UNITS_TO_SECONDS
-    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_SECONDS
+    g$runtime = g$runtime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
+    g$startTime = g$startTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
+    g$doneTime = g$doneTime * ORIGINAL_UNITS_TO_RUNTIME_UNITS
    g
  }
  lapply(gatkReportData, convertGroup)
@ -157,8 +159,8 @@ if ( ! is.na(outputPDF) ) {
  pdf(outputPDF, height=8.5, width=11)
 } 

-plotJobsGantt(gatkReportData, T)
-plotJobsGantt(gatkReportData, F)
+plotJobsGantt(gatkReportData, T, F)
+plotJobsGantt(gatkReportData, F, F)
 plotProgressByTime(gatkReportData)
 for ( group in gatkReportData ) {
 plotGroup(group)
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/DESCRIPTION
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.error.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.getargs.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.message.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.plot.venn.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.eval.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.gatkreport.R
@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
        .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
    }

-    gatkreport = as.list(tableEnv);
+    gatkreport = as.list(tableEnv, all.names=TRUE);
 }
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.squidmetrics.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.read.vcf.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/R/gsa.warn.R
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/Read-and-delete-me
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/data/tearsheetdrop.jpg
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.error.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.getargs.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.message.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.plot.venn.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.eval.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.gatkreport.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.squidmetrics.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.read.vcf.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsa.warn.Rd
--- a/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
+++ b/public/R/src/org/broadinstitute/sting/utils/R/gsalib/man/gsalib-package.Rd
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentValidationWalker.java
@ -25,7 +25,6 @@

 package org.broadinstitute.sting.alignment;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -35,6 +34,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.Iterator;

@ -72,12 +72,13 @@ public class AlignmentValidationWalker extends ReadWalker<Integer,Integer> {

    /**
     * Aligns a read to the given reference.
+     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of reads aligned by this map (aka 1).
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        //logger.info(String.format("examining read %s", read.getReadName()));

        byte[] bases = read.getReadBases();
--- a/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/AlignmentWalker.java
@ -39,6 +39,7 @@ import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.WalkerName;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.File;

@ -92,12 +93,13 @@ public class AlignmentWalker extends ReadWalker<Integer,Integer> {

    /**
     * Aligns a read to the given reference.
+     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of alignments found for this read.
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        SAMRecord alignedRead = aligner.align(read,header);
        out.addAlignment(alignedRead);
        return 1;
--- a/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/alignment/CountBestAlignmentsWalker.java
@ -25,7 +25,6 @@

 package org.broadinstitute.sting.alignment;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.alignment.bwa.BWAConfiguration;
 import org.broadinstitute.sting.alignment.bwa.BWTFiles;
 import org.broadinstitute.sting.alignment.bwa.c.BWACAligner;
@ -34,6 +33,7 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.PrintStream;
 import java.util.Iterator;
@ -79,12 +79,13 @@ public class CountBestAlignmentsWalker extends ReadWalker<Integer,Integer> {

    /**
     * Aligns a read to the given reference.
+     *
     * @param ref Reference over the read.  Read will most likely be unmapped, so ref will be null.
     * @param read Read to align.
     * @return Number of alignments found for this read.
     */
    @Override
-    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public Integer map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        Iterator<Alignment[]> alignmentIterator = aligner.getAllAlignments(read.getReadBases()).iterator();
        if(alignmentIterator.hasNext()) {
            int numAlignments = alignmentIterator.next().length;
--- a/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
+++ b/public/java/src/org/broadinstitute/sting/analyzecovariates/AnalyzeCovariates.java
@ -25,6 +25,9 @@

 package org.broadinstitute.sting.analyzecovariates;

+import org.apache.commons.io.FileUtils;
+import org.apache.commons.io.IOUtils;
+import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.CommandLineProgram;
@ -33,14 +36,16 @@ import org.broadinstitute.sting.gatk.walkers.recalibration.Covariate;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalDatum;
 import org.broadinstitute.sting.gatk.walkers.recalibration.RecalibrationArgumentCollection;
 import org.broadinstitute.sting.utils.R.RScriptExecutor;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.io.Resource;
 import org.broadinstitute.sting.utils.text.XReadLines;

 import java.io.*;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.Collection;
 import java.util.Map;
 import java.util.regex.Pattern;
@ -71,15 +76,13 @@ import java.util.regex.Pattern;
 * </ul>
 *
 * <p>
- * NOTE: For those running this tool externally from the Broad, it is crucial to note that both the -Rscript and -resources options
- * must be changed from the default. -Rscript needs to point to your installation of Rscript (this is the scripting version of R,
- * not the interactive version) while -resources needs to point to the folder holding the R scripts that are used. For those using
- * this tool as part of the Binary Distribution the -resources should point to the resources folder that is part of the tarball.
- * For those using this tool by building from the git repository the -resources should point to the R/ subdirectory of the Sting checkout.
+ * NOTE: Rscript needs to be in your environment PATH (this is the scripting version of R, not the interactive version).
+ * See <a target="r-project" href="http://www.r-project.org">http://www.r-project.org</a> for more info on how to download and install R.
 *
 * <p>
 * See the GATK wiki for a tutorial and example recalibration accuracy plots.
- * http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration
+ * <a target="gatkwiki" href="http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration"
+ * >http://www.broadinstitute.org/gsa/wiki/index.php/Base_quality_score_recalibration</a>
 *
 * <h2>Input</h2>
 * <p>
@ -91,7 +94,6 @@ import java.util.regex.Pattern;
 * java -Xmx4g -jar AnalyzeCovariates.jar \
 *   -recalFile /path/to/recal.table.csv  \
 *   -outputDir /path/to/output_dir/  \
- *   -resources resources/  \
 *   -ignoreQ 5
 * </pre>
 *
@ -101,6 +103,11 @@ import java.util.regex.Pattern;
        groupName = "AnalyzeCovariates",
        summary = "Package to plot residual accuracy versus error covariates for the base quality score recalibrator")
 public class AnalyzeCovariates extends CommandLineProgram {
+    final private static Logger logger = Logger.getLogger(AnalyzeCovariates.class);
+
+    private static final String PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE = "plot_residualError_QualityScoreCovariate.R";
+    private static final String PLOT_RESDIUAL_ERROR_OTHER_COVARIATE = "plot_residualError_OtherCovariate.R";
+    private static final String PLOT_INDEL_QUALITY_RSCRIPT = "plot_indelQuality.R";

    /////////////////////////////
    // Command Line Arguments
@ -114,11 +121,7 @@ public class AnalyzeCovariates extends CommandLineProgram {
    @Input(fullName = "recal_file", shortName = "recalFile", doc = "The input recal csv file to analyze", required = false)
    private String RECAL_FILE = "output.recal_data.csv";
    @Argument(fullName = "output_dir", shortName = "outputDir", doc = "The directory in which to output all the plots and intermediate data files", required = false)
-    private String OUTPUT_DIR = "analyzeCovariates/";
-    @Argument(fullName = "path_to_Rscript", shortName = "Rscript", doc = "The path to your implementation of Rscript. For Broad users this is maybe /broad/software/free/Linux/redhat_5_x86_64/pkgs/r_2.12.0/bin/Rscript", required = false)
-    private String PATH_TO_RSCRIPT = "Rscript";
-    @Argument(fullName = "path_to_resources", shortName = "resources", doc = "Path to resources folder holding the Sting R scripts.", required = false)
-    private String PATH_TO_RESOURCES = "public/R/";
+    private File OUTPUT_DIR = new File("analyzeCovariates");
    @Argument(fullName = "ignoreQ", shortName = "ignoreQ", doc = "Ignore bases with reported quality less than this number.", required = false)
    private int IGNORE_QSCORES_LESS_THAN = 5;
    @Argument(fullName = "numRG", shortName = "numRG", doc = "Only process N read groups. Default value: -1 (process all read groups)", required = false)
@ -154,29 +157,26 @@ public class AnalyzeCovariates extends CommandLineProgram {
    protected int execute() {

        // create the output directory where all the data tables and plots will go
-        try {
-            Process p = Runtime.getRuntime().exec("mkdir " + OUTPUT_DIR);
-        } catch (IOException e) {
-            System.out.println("Couldn't create directory: " + OUTPUT_DIR);
-            System.out.println("User is responsible for making sure the output directory exists.");
-        }
-        if( !OUTPUT_DIR.endsWith("/") ) { OUTPUT_DIR = OUTPUT_DIR + "/"; }
-        if( !PATH_TO_RESOURCES.endsWith("/") ) { PATH_TO_RESOURCES = PATH_TO_RESOURCES + "/"; }
+        if (!OUTPUT_DIR.exists() && !OUTPUT_DIR.mkdirs())
+            throw new UserException.BadArgumentValue("--output_dir/-outDir", "Unable to create output directory: " + OUTPUT_DIR);
+
+        if (!RScriptExecutor.RSCRIPT_EXISTS)
+            Utils.warnUser(logger, "Rscript not found in environment path. Plots will not be generated.");

        // initialize all the data from the csv file and allocate the list of covariates
-        System.out.println("Reading in input csv file...");
+        logger.info("Reading in input csv file...");
        initializeData();
-        System.out.println("...Done!");
+        logger.info("...Done!");

        // output data tables for Rscript to read in
-        System.out.println("Writing out intermediate tables for R...");
+        logger.info("Writing out intermediate tables for R...");
        writeDataTables();
-        System.out.println("...Done!");
+        logger.info("...Done!");

        // perform the analysis using Rscript and output the plots
-        System.out.println("Calling analysis R scripts and writing out figures...");
+        logger.info("Calling analysis R scripts and writing out figures...");
        callRScripts();
-        System.out.println("...Done!");
+        logger.info("...Done!");

        return 0;
    }
@ -287,37 +287,40 @@ public class AnalyzeCovariates extends CommandLineProgram {
            if(NUM_READ_GROUPS_TO_PROCESS == -1 || ++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS) {
                String readGroup = readGroupKey.toString();
                RecalDatum readGroupDatum = (RecalDatum) dataManager.getCollapsedTable(0).data.get(readGroupKey);
-                System.out.print("Writing out data tables for read group: " + readGroup + "\twith " + readGroupDatum.getNumObservations() + " observations"  );
-                System.out.println("\tand aggregate residual error = " + String.format("%.3f", readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));
+                logger.info(String.format(
+                        "Writing out data tables for read group: %s\twith %s observations\tand aggregate residual error = %.3f",
+                        readGroup, readGroupDatum.getNumObservations(),
+                        readGroupDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE) - readGroupDatum.getEstimatedQReported()));

                // for each covariate
                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
                    Covariate cov = requestedCovariates.get(iii);

                    // Create a PrintStream
-                    PrintStream output = null;
+                    File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
+                    PrintStream output;
                    try {
-                        output = new PrintStream(new FileOutputStream(OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat"));
-
-                    } catch (FileNotFoundException e) {
-                        System.err.println("Can't create file: " + OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
-                        System.exit(-1);
+                        output = new PrintStream(FileUtils.openOutputStream(outputFile));
+                    } catch (IOException e) {
+                        throw new UserException.CouldNotCreateOutputFile(outputFile, e);
                    }

-                    // Output the header
-                    output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");
+                    try {
+                        // Output the header
+                        output.println("Covariate\tQreported\tQempirical\tnMismatches\tnBases");

-                    for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
-                        output.print( covariateKey.toString() + "\t" );                                                     // Covariate
-                        RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
-                        output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
-                        output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
-                        output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
-                        output.println( thisDatum.getNumObservations() );                                                   // nBases
+                        for( Object covariateKey : ((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).keySet()) {
+                            output.print( covariateKey.toString() + "\t" );                                                     // Covariate
+                            RecalDatum thisDatum = (RecalDatum)((Map)dataManager.getCollapsedTable(iii).data.get(readGroupKey)).get(covariateKey);
+                            output.print( String.format("%.3f", thisDatum.getEstimatedQReported()) + "\t" );                    // Qreported
+                            output.print( String.format("%.3f", thisDatum.empiricalQualDouble(0, MAX_QUALITY_SCORE)) + "\t" );  // Qempirical
+                            output.print( thisDatum.getNumMismatches() + "\t" );                                                // nMismatches
+                            output.println( thisDatum.getNumObservations() );                                                   // nBases
+                        }
+                    } finally {
+                        // Close the PrintStream
+                        IOUtils.closeQuietly(output);
                    }
-
-                    // Close the PrintStream
-                    output.close();
                }
            } else {
                break;
@ -327,10 +330,6 @@ public class AnalyzeCovariates extends CommandLineProgram {
    }

    private void callRScripts() {
-        RScriptExecutor.RScriptArgumentCollection argumentCollection =
-                new RScriptExecutor.RScriptArgumentCollection(PATH_TO_RSCRIPT, Arrays.asList(PATH_TO_RESOURCES));
-        RScriptExecutor executor = new RScriptExecutor(argumentCollection, true);
-
        int numReadGroups = 0;

        // for each read group
@ -338,23 +337,32 @@ public class AnalyzeCovariates extends CommandLineProgram {
            if(++numReadGroups <= NUM_READ_GROUPS_TO_PROCESS || NUM_READ_GROUPS_TO_PROCESS == -1) {

                String readGroup = readGroupKey.toString();
-                System.out.println("Analyzing read group: " + readGroup);
+                logger.info("Analyzing read group: " + readGroup);

                // for each covariate
                for( int iii = 1; iii < requestedCovariates.size(); iii++ ) {
                    Covariate cov = requestedCovariates.get(iii);
-                    final String outputFilename = OUTPUT_DIR + readGroup + "." + cov.getClass().getSimpleName()+ ".dat";
+                    final File outputFile = new File(OUTPUT_DIR, readGroup + "." + cov.getClass().getSimpleName()+ ".dat");
                    if (DO_INDEL_QUALITY) {
-                        executor.callRScripts("plot_indelQuality.R", outputFilename,
-                                cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
+                        RScriptExecutor executor = new RScriptExecutor();
+                        executor.addScript(new Resource(PLOT_INDEL_QUALITY_RSCRIPT, AnalyzeCovariates.class));
+                        // The second argument is the name of the covariate in order to make the plots look nice
+                        executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
+                        executor.exec();
                    }   else {
                        if( iii == 1 ) {
                            // Analyze reported quality
-                            executor.callRScripts("plot_residualError_QualityScoreCovariate.R", outputFilename,
-                                    IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE); // The third argument is the Q scores that should be turned pink in the plot because they were ignored
+                            RScriptExecutor executor = new RScriptExecutor();
+                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_QUALITY_SCORE_COVARIATE, AnalyzeCovariates.class));
+                            // The second argument is the Q scores that should be turned pink in the plot because they were ignored
+                            executor.addArgs(outputFile, IGNORE_QSCORES_LESS_THAN, MAX_QUALITY_SCORE, MAX_HISTOGRAM_VALUE);
+                            executor.exec();
                        } else { // Analyze all other covariates
-                            executor.callRScripts("plot_residualError_OtherCovariate.R", outputFilename,
-                                    cov.getClass().getSimpleName().split("Covariate")[0]); // The third argument is the name of the covariate in order to make the plots look nice
+                            RScriptExecutor executor = new RScriptExecutor();
+                            executor.addScript(new Resource(PLOT_RESDIUAL_ERROR_OTHER_COVARIATE, AnalyzeCovariates.class));
+                            // The second argument is the name of the covariate in order to make the plots look nice
+                            executor.addArgs(outputFile, cov.getClass().getSimpleName().split("Covariate")[0]);
+                            executor.exec();
                        }
                    }
                }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatch.java
@ -46,7 +46,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    /**
     * Maps indices of command line arguments to values paired with that argument.
     */
-    public final SortedMap<Integer,List<String>> indices = new TreeMap<Integer,List<String>>();
+    public final SortedMap<ArgumentMatchSite,List<String>> sites = new TreeMap<ArgumentMatchSite,List<String>>();

    /**
     * An ordered, freeform collection of tags.
@ -72,32 +72,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    }

    /**
-     * A simple way of indicating that an argument with the given label and definition exists at this index.
+     * A simple way of indicating that an argument with the given label and definition exists at this site.
     * @param label Label of the argument match.  Must not be null.
     * @param definition The associated definition, if one exists.  May be null.
-     * @param index Position of the argument.  Must not be null.
+     * @param site Position of the argument.  Must not be null.
     * @param tags ordered freeform text tags associated with this argument.
     */
-    public ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final Tags tags) {
-        this( label, definition, index, null, tags );
+    public ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final Tags tags) {
+        this( label, definition, site, null, tags );
    }

    /**
-     * A simple way of indicating that an argument with the given label and definition exists at this index.
+     * A simple way of indicating that an argument with the given label and definition exists at this site.
     * @param label Label of the argument match.  Must not be null.
     * @param definition The associated definition, if one exists.  May be null.
-     * @param index Position of the argument.  Must not be null.
+     * @param site Position of the argument.  Must not be null.
     * @param value Value for the argument at this position.
     * @param tags ordered freeform text tags associated with this argument.
     */
-    private ArgumentMatch(final String label, final ArgumentDefinition definition, final int index, final String value, final Tags tags) {
+    private ArgumentMatch(final String label, final ArgumentDefinition definition, final ArgumentMatchSite site, final String value, final Tags tags) {
        this.label = label;
        this.definition = definition;

        ArrayList<String> values = new ArrayList<String>();
        if( value != null )
            values.add(value);
-        indices.put(index,values );
+        sites.put(site,values );

        this.tags = tags;
    }
@ -117,7 +117,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
        ArgumentMatch otherArgumentMatch = (ArgumentMatch)other;
        return this.definition.equals(otherArgumentMatch.definition) &&
                this.label.equals(otherArgumentMatch.label) &&
-                this.indices.equals(otherArgumentMatch.indices) &&
+                this.sites.equals(otherArgumentMatch.sites) &&
                this.tags.equals(otherArgumentMatch.tags);
    }

@ -129,16 +129,17 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     * @param key Key which specifies the transform.
     * @return A variant of this ArgumentMatch with all keys transformed.
     */
+    @SuppressWarnings("unchecked")
    ArgumentMatch transform(Multiplexer multiplexer, Object key) {
-        SortedMap<Integer,List<String>> newIndices = new TreeMap<Integer,List<String>>();
-        for(Map.Entry<Integer,List<String>> index: indices.entrySet()) {
+        SortedMap<ArgumentMatchSite,List<String>> newIndices = new TreeMap<ArgumentMatchSite,List<String>>();
+        for(Map.Entry<ArgumentMatchSite,List<String>> site: sites.entrySet()) {
            List<String> newEntries = new ArrayList<String>();
-            for(String entry: index.getValue())
+            for(String entry: site.getValue())
                newEntries.add(multiplexer.transformArgument(key,entry));
-            newIndices.put(index.getKey(),newEntries);
+            newIndices.put(site.getKey(),newEntries);
        }
        ArgumentMatch newArgumentMatch = new ArgumentMatch(label,definition);
-        newArgumentMatch.indices.putAll(newIndices);
+        newArgumentMatch.sites.putAll(newIndices);
        return newArgumentMatch;
    }

@ -157,9 +158,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
    public Iterator<ArgumentMatch> iterator() {
        return new Iterator<ArgumentMatch>() {
            /**
-             * Iterate over each the available index.
+             * Iterate over each the available site.
             */
-            private Iterator<Integer> indexIterator = null;
+            private Iterator<ArgumentMatchSite> siteIterator = null;

            /**
             * Iterate over each available token.
@ -167,9 +168,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
            private Iterator<String> tokenIterator = null;

            /**
-             * The next index to return.  Null if none remain.
+             * The next site to return.  Null if none remain.
             */
-            Integer nextIndex = null;
+            ArgumentMatchSite nextSite = null;

            /**
             * The next token to return.  Null if none remain.
@ -177,7 +178,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
            String nextToken = null;

            {
-                indexIterator = indices.keySet().iterator();
+                siteIterator = sites.keySet().iterator();
                prepareNext();
            }

@ -186,7 +187,7 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
             * @return True if there's another token waiting in the wings.  False otherwise.
             */
            public boolean hasNext() {
-                return nextToken != null;    
+                return nextToken != null;
            }

            /**
@ -194,32 +195,32 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
             * @return The next ArgumentMatch in the series.  Should never be null.
             */
            public ArgumentMatch next() {
-                if( nextIndex == null || nextToken == null )
+                if( nextSite == null || nextToken == null )
                    throw new IllegalStateException( "No more ArgumentMatches are available" );

-                ArgumentMatch match = new ArgumentMatch( label, definition, nextIndex, nextToken, tags );
+                ArgumentMatch match = new ArgumentMatch( label, definition, nextSite, nextToken, tags );
                prepareNext();
                return match;
            }

            /**
             * Initialize the next ArgumentMatch to return.  If no ArgumentMatches are available,
-             * initialize nextIndex / nextToken to null.
+             * initialize nextSite / nextToken to null.
             */
            private void prepareNext() {
                if( tokenIterator != null && tokenIterator.hasNext() ) {
                    nextToken = tokenIterator.next();
                }
                else {
-                    nextIndex = null;
+                    nextSite = null;
                    nextToken = null;

                    // Do a nested loop.  While more data is present in the inner loop, grab that data.
                    // Otherwise, troll the outer iterator looking for more data.
-                    while( indexIterator.hasNext() ) {
-                        nextIndex = indexIterator.next();
-                        if( indices.get(nextIndex) != null ) {
-                            tokenIterator = indices.get(nextIndex).iterator();
+                    while( siteIterator.hasNext() ) {
+                        nextSite = siteIterator.next();
+                        if( sites.get(nextSite) != null ) {
+                            tokenIterator = sites.get(nextSite).iterator();
                            if( tokenIterator.hasNext() ) {
                                nextToken = tokenIterator.next();
                                break;
@ -245,29 +246,29 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     * @param other The other match to merge into.
     */
    public void mergeInto( ArgumentMatch other ) {
-        indices.putAll(other.indices);
+        sites.putAll(other.sites);
    }

    /**
     * Associate a value with this merge maapping.
-     * @param index index of the command-line argument to which this value is mated.
+     * @param site site of the command-line argument to which this value is mated.
     * @param value Text representation of value to add.
     */
-    public void addValue( int index, String value ) {
-        if( !indices.containsKey(index) || indices.get(index) == null )
-            indices.put(index, new ArrayList<String>() );
-        indices.get(index).add(value);
+    public void addValue( ArgumentMatchSite site, String value ) {
+        if( !sites.containsKey(site) || sites.get(site) == null )
+            sites.put(site, new ArrayList<String>() );
+        sites.get(site).add(value);
    }

    /**
     * Does this argument already have a value at the given site?
     * Arguments are only allowed to be single-valued per site, and
     * flags aren't allowed a value at all.
-     * @param index Index at which to check for values.
+     * @param site Site at which to check for values.
     * @return True if the argument has a value at the given site.  False otherwise.
     */
-    public boolean hasValueAtSite( int index ) {
-        return (indices.get(index) != null && indices.get(index).size() >= 1) || isArgumentFlag();
+    public boolean hasValueAtSite( ArgumentMatchSite site ) {
+        return (sites.get(site) != null && sites.get(site).size() >= 1) || isArgumentFlag();
    }

    /**
@ -276,9 +277,9 @@ public class ArgumentMatch implements Iterable<ArgumentMatch> {
     */
    public List<String> values() {
        List<String> values = new ArrayList<String>();
-        for( int index: indices.keySet() ) {
-            if( indices.get(index) != null )
-                values.addAll(indices.get(index));
+        for( ArgumentMatchSite site: sites.keySet() ) {
+            if( sites.get(site) != null )
+                values.addAll(sites.get(site));
        }
        return values;
    }
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSite.java
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+/**
+ * Which source and the index within the source where an argument match was found.
+ */
+public class ArgumentMatchSite implements Comparable<ArgumentMatchSite> {
+    private final ArgumentMatchSource source;
+    private final int index;
+
+    public ArgumentMatchSite(ArgumentMatchSource source, int index) {
+        this.source = source;
+        this.index = index;
+    }
+
+    public ArgumentMatchSource getSource() {
+        return source;
+    }
+
+    public int getIndex() {
+        return index;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        ArgumentMatchSite that = (ArgumentMatchSite) o;
+
+        return (index == that.index) && (source == null ? that.source == null : source.equals(that.source));
+    }
+
+    @Override
+    public int hashCode() {
+        int result = source != null ? source.hashCode() : 0;
+        // Generated by intellij. No other special reason to this implementation. -ks
+        result = 31 * result + index;
+        return result;
+    }
+
+    @Override
+    public int compareTo(ArgumentMatchSite that) {
+        int comp = this.source.compareTo(that.source);
+        if (comp != 0)
+            return comp;
+
+        // Both files are the same.
+        if (this.index == that.index)
+            return 0;
+        return this.index < that.index ? -1 : 1;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSource.java
@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import java.io.File;
+
+/**
+ * Where an argument match originated, via the commandline or a file.
+ */
+public class ArgumentMatchSource implements Comparable<ArgumentMatchSource> {
+    public static final ArgumentMatchSource COMMAND_LINE = new ArgumentMatchSource(ArgumentMatchSourceType.CommandLine, null);
+
+    private final ArgumentMatchSourceType type;
+    private final File file;
+
+    /**
+     * Creates an argument match source from the specified file.
+     * @param file File specifying the arguments. Must not be null.
+     */
+    public ArgumentMatchSource(File file) {
+        this(ArgumentMatchSourceType.File, file);
+    }
+
+    private ArgumentMatchSource(ArgumentMatchSourceType type, File file) {
+        if (type == ArgumentMatchSourceType.File && file == null)
+            throw new IllegalArgumentException("An argument match source of type File cannot have a null file.");
+        this.type = type;
+        this.file = file;
+    }
+
+    public ArgumentMatchSourceType getType() {
+        return type;
+    }
+
+    public File getFile() {
+        return file;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        ArgumentMatchSource that = (ArgumentMatchSource) o;
+
+        return (type == that.type) && (file == null ? that.file == null : file.equals(that.file));
+    }
+
+    @Override
+    public int hashCode() {
+        int result = type != null ? type.hashCode() : 0;
+        result = 31 * result + (file != null ? file.hashCode() : 0);
+        return result;
+    }
+
+    /**
+     * Compares two sources, putting the command line first, then files.
+     */
+    @Override
+    public int compareTo(ArgumentMatchSource that) {
+        int comp = this.type.compareTo(that.type);
+        if (comp != 0)
+            return comp;
+
+        File f1 = this.file;
+        File f2 = that.file;
+
+        if ((f1 == null) ^ (f2 == null)) {
+            // If one of the files is null and the other is not
+            // put the null file first
+            return f1 == null ? -1 : 1;
+        }
+
+        return f1 == null ? 0 : f1.compareTo(f2);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatchSourceType.java
@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+/**
+ * Type of where an argument match originated, via the commandline or a file.
+ */
+public enum ArgumentMatchSourceType {
+    CommandLine, File
+}
--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentMatches.java
@ -37,7 +37,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * Collection matches from argument definition to argument value.
     * Package protected access is deliberate.
     */
-    Map<Integer,ArgumentMatch> argumentMatches = new TreeMap<Integer,ArgumentMatch>();
+    Map<ArgumentMatchSite,ArgumentMatch> argumentMatches = new TreeMap<ArgumentMatchSite,ArgumentMatch>();

    /**
     * Provide a place to put command-line argument values that don't seem to belong to
@ -80,7 +80,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * @param site Site at which to check.
     * @return True if the site has a match.  False otherwise.
     */
-    boolean hasMatch( int site ) {
+    boolean hasMatch( ArgumentMatchSite site ) {
        return argumentMatches.containsKey( site );
    }

@ -90,7 +90,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * @return The match present at the given site.
     * @throws IllegalArgumentException if site does not contain a match.
     */
-    ArgumentMatch getMatch( int site ) {
+    ArgumentMatch getMatch( ArgumentMatchSite site ) {
        if( !argumentMatches.containsKey(site) )
            throw new IllegalArgumentException( "Site does not contain an argument: " + site );
        return argumentMatches.get(site);
@ -107,6 +107,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {

    /**
     * Return all argument matches of this source.
+     * @param parsingEngine Parsing engine.
     * @param argumentSource Argument source to match.
     * @return List of all matches.
     */
@ -167,6 +168,7 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
     * TODO: Generify this.
     * @param multiplexer Multiplexer that controls the transformation process.
     * @param key Key which specifies the transform.
+     * @return new argument matches.
     */
    ArgumentMatches transform(Multiplexer multiplexer, Object key) {
        ArgumentMatches newArgumentMatches = new ArgumentMatches();
@ -187,15 +189,15 @@ public class ArgumentMatches implements Iterable<ArgumentMatch> {
        for( ArgumentMatch argumentMatch: getUniqueMatches() ) {
            if( argumentMatch.definition == match.definition && argumentMatch.tags.equals(match.tags) ) {
                argumentMatch.mergeInto( match );
-                for( int index: match.indices.keySet() )
-                    argumentMatches.put( index, argumentMatch );
+                for( ArgumentMatchSite site: match.sites.keySet() )
+                    argumentMatches.put( site, argumentMatch );
                definitionExists = true;
            }
        }

        if( !definitionExists ) {
-            for( int index: match.indices.keySet() )
-                argumentMatches.put( index, match );
+            for( ArgumentMatchSite site: match.sites.keySet() )
+                argumentMatches.put( site, match );
        }
    }    

--- a/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ArgumentTypeDescriptor.java
@ -336,6 +336,28 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {

    @Override
    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
+        return parse(parsingEngine, source, type, matches, false);
+    }
+
+    /**
+     * The actual argument parsing method.
+     *
+     * IMPORTANT NOTE: the createIntervalBinding argument is a bit of a hack, but after discussions with SE we've decided
+     *   that it's the best way to proceed for now.  IntervalBindings can either be proper RodBindings (hence the use of
+     *   this parse() method) or can be Strings (representing raw intervals or the files containing them).  If createIntervalBinding
+     *   is true, we do not call parsingEngine.addRodBinding() because we don't want walkers to assume that these are the
+     *   usual set of RodBindings.  It also allows us in the future to be smart about tagging rods as intervals.  One other
+     *   side point is that we want to continue to allow the usage of non-Feature intervals so that users can theoretically
+     *   continue to input them out of order (whereas Tribble Features are ordered).
+     *
+     * @param parsingEngine      parsing engine
+     * @param source             source
+     * @param type               type to check
+     * @param matches            matches
+     * @param createIntervalBinding should we attempt to create an IntervalBinding instead of a RodBinding?
+     * @return the RodBinding/IntervalBinding object depending on the value of createIntervalBinding.
+     */
+    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches, boolean createIntervalBinding) {
        ArgumentDefinition defaultDefinition = createDefaultArgumentDefinition(source);
        String value = getArgumentValue( defaultDefinition, matches );
        Class<? extends Feature> parameterType = JVMUtils.getParameterizedTypeClass(type);
@ -348,7 +370,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
            if ( tags.getPositionalTags().size() > 2 ) {
                throw new UserException.CommandLineException(
                        String.format("Unexpected number of positional tags for argument %s : %s. " +
-                                "Rod bindings only suport -X:type and -X:name,type argument styles",
+                                "Rod bindings only support -X:type and -X:name,type argument styles",
                                value, source.field.getName()));
            } if ( tags.getPositionalTags().size() == 2 ) {
                // -X:name,type style
@ -378,7 +400,12 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                        }
                    }

-                    if ( tribbleType == null )
+                    if ( tribbleType == null ) {
+                        // IntervalBindings allow streaming conversion of Strings
+                        if ( createIntervalBinding ) {
+                            return new IntervalBinding(value);
+                        }
+
                        if ( ! file.exists() ) {
                            throw new UserException.CouldNotReadInputFile(file, "file does not exist");
                        } else if ( ! file.canRead() || ! file.isFile() ) {
@ -389,13 +416,20 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
                                            "Please add an explicit type tag :NAME listing the correct type from among the supported types:%n%s",
                                            manager.userFriendlyListOfAvailableFeatures(parameterType)));
                        }
+                    }
                }
            }

            Constructor ctor = (makeRawTypeIfNecessary(type)).getConstructor(Class.class, String.class, String.class, String.class, Tags.class);
-            RodBinding result = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
-            parsingEngine.addTags(result,tags);
-            parsingEngine.addRodBinding(result);
+            Object result;
+            if ( createIntervalBinding ) {
+                result = ctor.newInstance(parameterType, name, value, tribbleType, tags);
+            } else {
+                RodBinding rbind = (RodBinding)ctor.newInstance(parameterType, name, value, tribbleType, tags);
+                parsingEngine.addTags(rbind, tags);
+                parsingEngine.addRodBinding(rbind);
+                result = rbind;
+            }
            return result;
        } catch (InvocationTargetException e) {
            throw new UserException.CommandLineException(
@ -409,6 +443,39 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    }
 }

+/**
+ * Parser for RodBinding objects
+ */
+class IntervalBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
+    /**
+     * We only want IntervalBinding class objects
+     * @param type The type to check.
+     * @return true if the provided class is an IntervalBinding.class
+     */
+    @Override
+    public boolean supports( Class type ) {
+        return isIntervalBinding(type);
+    }
+
+    public static boolean isIntervalBinding( Class type ) {
+        return IntervalBinding.class.isAssignableFrom(type);
+    }
+
+    /**
+     * See note from RodBindingArgumentTypeDescriptor.parse().
+     *
+     * @param parsingEngine      parsing engine
+     * @param source             source
+     * @param type               type to check
+     * @param matches            matches
+     * @return the IntervalBinding object.
+     */
+    @Override
+    public Object parse(ParsingEngine parsingEngine, ArgumentSource source, Type type, ArgumentMatches matches) {
+        return new RodBindingArgumentTypeDescriptor().parse(parsingEngine, source, type, matches, true);
+    }
+}
+
 /**
 * Parse simple argument types: java primitives, wrapper classes, and anything that has
 * a simple String constructor.
@ -416,7 +483,7 @@ class RodBindingArgumentTypeDescriptor extends ArgumentTypeDescriptor {
 class SimpleArgumentTypeDescriptor extends ArgumentTypeDescriptor {
    @Override
    public boolean supports( Class type ) {
-        if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) ) return false;
+        if ( RodBindingArgumentTypeDescriptor.isRodBinding(type) || IntervalBindingArgumentTypeDescriptor.isIntervalBinding(type) ) return false;
        if ( type.isPrimitive() ) return true;
        if ( type.isEnum() ) return true;
        if ( primitiveToWrapperMap.containsValue(type) ) return true;
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@ -35,10 +35,7 @@ import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.HelpFormatter;

 import java.io.IOException;
-import java.util.Collection;
-import java.util.Collections;
-import java.util.EnumSet;
-import java.util.Locale;
+import java.util.*;

 public abstract class CommandLineProgram {

@ -155,6 +152,7 @@ public abstract class CommandLineProgram {
     *
     * @param clp  the command line program to execute
     * @param args the command line arguments passed in
+     * @param dryRun dry run
     * @throws Exception when an exception occurs
     */
    @SuppressWarnings("unchecked")
@ -176,6 +174,8 @@ public abstract class CommandLineProgram {
            ParsingEngine parser = clp.parser = new ParsingEngine(clp);
            parser.addArgumentSource(clp.getClass());

+            Map<ArgumentMatchSource, List<String>> parsedArgs;
+
            // process the args
            if (clp.canAddArgumentsDynamically()) {
                // if the command-line program can toss in extra args, fetch them and reparse the arguments.
@ -196,14 +196,14 @@ public abstract class CommandLineProgram {
                Class[] argumentSources = clp.getArgumentSources();
                for (Class argumentSource : argumentSources)
                    parser.addArgumentSource(clp.getArgumentSourceName(argumentSource), argumentSource);
-                parser.parse(args);
+                parsedArgs = parser.parse(args);

                if (isHelpPresent(parser))
                    printHelpAndExit(clp, parser);

                if ( ! dryRun ) parser.validate();
            } else {
-                parser.parse(args);
+                parsedArgs = parser.parse(args);

                if ( ! dryRun ) {
                    if (isHelpPresent(parser))
@ -230,7 +230,7 @@ public abstract class CommandLineProgram {
                }

                // regardless of what happens next, generate the header information
-                HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), args);
+                HelpFormatter.generateHeaderInformation(clp.getApplicationDetails(), parsedArgs);

                // call the execute
                CommandLineProgram.result = clp.execute();
--- a/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/IntervalBinding.java
@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.commandline;
+
+import com.google.java.contract.Requires;
+import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodec;
+import org.broad.tribble.readers.AsciiLineReader;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
+import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.interval.IntervalUtils;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.*;
+
+/**
+ * An IntervalBinding representing a walker argument that gets bound to either a ROD track or interval string.
+ *
+ * The IntervalBinding<T> is a formal GATK argument that bridges between a walker and
+ * the engine to construct intervals for traversal at runtime.  The IntervalBinding can
+ * either be a RodBinding<T>, a string of one or more intervals, or a file with interval strings.
+ * The GATK Engine takes care of initializing the binding when appropriate and determining intervals from it.
+ *
+ * Note that this class is immutable.
+ */
+public final class IntervalBinding<T extends Feature> {
+
+    private RodBinding<T> featureIntervals;
+    private String stringIntervals;
+
+    @Requires({"type != null", "rawName != null", "source != null", "tribbleType != null", "tags != null"})
+    public IntervalBinding(Class<T> type, final String rawName, final String source, final String tribbleType, final Tags tags) {
+        featureIntervals = new RodBinding<T>(type, rawName, source, tribbleType, tags);
+    }
+
+    @Requires({"intervalArgument != null"})
+    public IntervalBinding(String intervalArgument) {
+        stringIntervals = intervalArgument;
+    }
+
+    public String getSource() {
+        if ( featureIntervals != null )
+            return featureIntervals.getSource();
+        return stringIntervals;
+    }
+
+    public List<GenomeLoc> getIntervals(GenomeAnalysisEngine toolkit) {
+        List<GenomeLoc> intervals;
+
+        if ( featureIntervals != null ) {
+            intervals = new ArrayList<GenomeLoc>();
+
+            //RMDTrackBuilder builder = new RMDTrackBuilder(toolkit.getReferenceDataSource().getReference().getSequenceDictionary(),
+            //        toolkit.getGenomeLocParser(),
+            //        toolkit.getArguments().unsafe);
+
+            // TODO -- after ROD system cleanup, go through the ROD system so that we can handle things like gzipped files
+
+            FeatureCodec codec = new FeatureManager().getByName(featureIntervals.getTribbleType()).getCodec();
+            if ( codec instanceof ReferenceDependentFeatureCodec )
+                ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(toolkit.getGenomeLocParser());
+            try {
+                FileInputStream fis = new FileInputStream(new File(featureIntervals.getSource()));
+                AsciiLineReader lineReader = new AsciiLineReader(fis);
+                codec.readHeader(lineReader);
+                String line = lineReader.readLine();
+                while ( line != null ) {
+                    intervals.add(toolkit.getGenomeLocParser().createGenomeLoc(codec.decodeLoc(line)));
+                    line = lineReader.readLine();
+                }
+            } catch (IOException e) {
+                throw new UserException("Problem reading the interval file " + featureIntervals.getSource() + "; " + e.getMessage());
+            }
+
+        } else {
+            intervals = IntervalUtils.parseIntervalArguments(toolkit.getGenomeLocParser(), stringIntervals);
+        }
+
+        return intervals;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingEngine.java
@ -26,6 +26,7 @@
 package org.broadinstitute.sting.commandline;

 import com.google.java.contract.Requires;
+import org.apache.commons.io.FileUtils;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.JVMUtils;
@ -35,6 +36,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.help.ApplicationDetails;
 import org.broadinstitute.sting.utils.help.HelpFormatter;

+import java.io.File;
+import java.io.IOException;
 import java.lang.reflect.Field;
 import java.util.*;

@ -75,6 +78,7 @@ public class ParsingEngine {
     * The type of set used must be ordered (but not necessarily sorted).
     */
    private static final Set<ArgumentTypeDescriptor> STANDARD_ARGUMENT_TYPE_DESCRIPTORS = new LinkedHashSet<ArgumentTypeDescriptor>( Arrays.asList(new SimpleArgumentTypeDescriptor(),
+            new IntervalBindingArgumentTypeDescriptor(),
            new RodBindingArgumentTypeDescriptor(),
            new CompoundArgumentTypeDescriptor(),
            new MultiplexArgumentTypeDescriptor()) );
@ -100,6 +104,8 @@ public class ParsingEngine {
        if(clp != null)
            argumentTypeDescriptors.addAll(clp.getArgumentTypeDescriptors());
        argumentTypeDescriptors.addAll(STANDARD_ARGUMENT_TYPE_DESCRIPTORS);
+
+        addArgumentSource(ParsingEngineArgumentFiles.class);
    }

    /**
@ -148,21 +154,43 @@ public class ParsingEngine {
     * command-line arguments to the arguments that are actually
     * required.
     * @param tokens Tokens passed on the command line.
+     * @return The parsed arguments by file.
     */
-    public void parse( String[] tokens ) {
+    public SortedMap<ArgumentMatchSource, List<String>> parse( String[] tokens ) {
        argumentMatches = new ArgumentMatches();
+        SortedMap<ArgumentMatchSource, List<String>> parsedArgs = new TreeMap<ArgumentMatchSource, List<String>>();

-        int lastArgumentMatchSite = -1;
+        List<String> cmdLineTokens = Arrays.asList(tokens);
+        parse(ArgumentMatchSource.COMMAND_LINE, cmdLineTokens, argumentMatches, parsedArgs);

-        for( int i = 0; i < tokens.length; i++ ) {
-            String token = tokens[i];
+        ParsingEngineArgumentFiles argumentFiles = new ParsingEngineArgumentFiles();
+
+        // Load the arguments ONLY into the argument files.
+        // Validation may optionally run on the rest of the arguments.
+        loadArgumentsIntoObject(argumentFiles);
+
+        for (File file: argumentFiles.files) {
+            List<String> fileTokens = getArguments(file);
+            parse(new ArgumentMatchSource(file), fileTokens, argumentMatches, parsedArgs);
+        }
+
+        return parsedArgs;
+    }
+
+    private void parse(ArgumentMatchSource matchSource, List<String> tokens,
+                       ArgumentMatches argumentMatches, SortedMap<ArgumentMatchSource, List<String>> parsedArgs) {
+        ArgumentMatchSite lastArgumentMatchSite = new ArgumentMatchSite(matchSource, -1);
+
+        int i = 0;
+        for (String token: tokens) {
            // If the token is of argument form, parse it into its own argument match.
            // Otherwise, pair it with the most recently used argument discovered.
+            ArgumentMatchSite site = new ArgumentMatchSite(matchSource, i);
            if( isArgumentForm(token) ) {
-                ArgumentMatch argumentMatch = parseArgument( token, i );
+                ArgumentMatch argumentMatch = parseArgument( token, site );
                if( argumentMatch != null ) {
                    argumentMatches.mergeInto( argumentMatch );
-                    lastArgumentMatchSite = i;
+                    lastArgumentMatchSite = site;
                }
            }
            else {
@ -170,10 +198,31 @@ public class ParsingEngine {
                    !argumentMatches.getMatch(lastArgumentMatchSite).hasValueAtSite(lastArgumentMatchSite))
                    argumentMatches.getMatch(lastArgumentMatchSite).addValue( lastArgumentMatchSite, token );
                else
-                    argumentMatches.MissingArgument.addValue( i, token );
+                    argumentMatches.MissingArgument.addValue( site, token );

            }
+            i++;
        }
+
+        parsedArgs.put(matchSource, tokens);
+    }
+
+    private List<String> getArguments(File file) {
+        try {
+            if (file.getAbsolutePath().endsWith(".list")) {
+                return getListArguments(file);
+            }
+        } catch (IOException e) {
+            throw new UserException.CouldNotReadInputFile(file, e);
+        }
+        throw new UserException.CouldNotReadInputFile(file, "file extension is not .list");
+    }
+
+    private List<String> getListArguments(File file) throws IOException {
+        ArrayList<String> argsList = new ArrayList<String>();
+        for (String line: FileUtils.readLines(file))
+            argsList.addAll(Arrays.asList(Utils.escapeExpressions(line)));
+        return argsList;
    }

    public enum ValidationType { MissingRequiredArgument,
@ -494,7 +543,7 @@ public class ParsingEngine {
     * @param position The position of the token in question.
     * @return ArgumentMatch associated with this token, or null if no match exists.
     */    
-    private ArgumentMatch parseArgument( String token, int position ) {
+    private ArgumentMatch parseArgument( String token, ArgumentMatchSite position ) {
        if( !isArgumentForm(token) )
            throw new IllegalArgumentException( "Token is not recognizable as an argument: " + token );

@ -579,9 +628,21 @@ class UnmatchedArgumentException extends ArgumentException {

    private static String formatArguments( ArgumentMatch invalidValues ) {
        StringBuilder sb = new StringBuilder();
-        for( int index: invalidValues.indices.keySet() )
-            for( String value: invalidValues.indices.get(index) ) {
-                sb.append( String.format("%nInvalid argument value '%s' at position %d.", value, index) );
+        for( ArgumentMatchSite site: invalidValues.sites.keySet() )
+            for( String value: invalidValues.sites.get(site) ) {
+                switch (site.getSource().getType()) {
+                    case CommandLine:
+                        sb.append( String.format("%nInvalid argument value '%s' at position %d.",
+                                value, site.getIndex()) );
+                        break;
+                    case File:
+                        sb.append( String.format("%nInvalid argument value '%s' in file %s at position %d.",
+                                value, site.getSource().getFile().getAbsolutePath(), site.getIndex()) );
+                        break;
+                    default:
+                        throw new RuntimeException( String.format("Unexpected argument match source type: %s",
+                                site.getSource().getType()));
+                }
                if(value != null && Utils.dupString(' ',value.length()).equals(value))
                    sb.append("  Please make sure any line continuation backslashes on your command line are not followed by whitespace.");
            }
@ -634,4 +695,13 @@ class UnknownEnumeratedValueException extends ArgumentException {
    private static String formatArguments(ArgumentDefinition definition, String argumentPassed) {
        return String.format("Invalid value %s specified for argument %s; valid options are (%s).", argumentPassed, definition.fullName, Utils.join(",",definition.validOptions));
    }
-}
+}
+
+/**
+ * Container class to store the list of argument files.
+ * The files will be parsed after the command line arguments.
+ */
+class ParsingEngineArgumentFiles {
+    @Argument(fullName = "arg_file", shortName = "args", doc = "Reads arguments from the specified file", required = false)
+    public List<File> files = new ArrayList<File>();
+}
--- a/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/ParsingMethod.java
@ -68,7 +68,7 @@ public abstract class ParsingMethod {
     * @return An argument match.  Definition field will be populated if a match was found or
     *         empty if no appropriate definition could be found. 
     */
-    public ArgumentMatch match( ArgumentDefinitions definitions, String token, int position ) {
+    public ArgumentMatch match( ArgumentDefinitions definitions, String token, ArgumentMatchSite position ) {
        // If the argument is valid, parse out the argument.
        Matcher matcher = pattern.matcher(token);

@ -102,9 +102,7 @@ public abstract class ParsingMethod {

        // Try to find a matching argument.  If found, label that as the match.  If not found, add the argument
        // with a null definition.
-        ArgumentMatch argumentMatch = new ArgumentMatch(argument,argumentDefinition,position,tags);
-
-        return argumentMatch;
+        return new ArgumentMatch(argument,argumentDefinition,position,tags);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -28,34 +28,30 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.samtools.*;
 import org.apache.log4j.Logger;
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.*;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
+import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.executive.MicroScheduler;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.stubs.Stub;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
-import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
+import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
 import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
+import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.io.File;
 import java.util.*;
@ -92,7 +88,7 @@ public class GenomeAnalysisEngine {
    /**
     * Accessor for sample metadata
     */
-    private SampleDataSource sampleDataSource = null;
+    private SampleDB sampleDB = null;

    /**
     * Accessor for sharded reference-ordered data.
@ -206,6 +202,9 @@ public class GenomeAnalysisEngine {
        // Prepare the data for traversal.
        initializeDataSources();

+        // initialize sampleDB
+        initializeSampleDB();
+
        // initialize and validate the interval list
        initializeIntervals();
        validateSuppliedIntervals();
@ -222,12 +221,12 @@ public class GenomeAnalysisEngine {
        ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);

        // execute the microscheduler, storing the results
-        Object result =  microScheduler.execute(this.walker, shardStrategy);
+        return microScheduler.execute(this.walker, shardStrategy);

        //monitor.stop();
        //logger.info(String.format("Maximum heap size consumed: %d",monitor.getMaxMemoryUsed()));

-        return result;
+        //return result;
    }

    /**
@ -259,13 +258,12 @@ public class GenomeAnalysisEngine {
     * @return A collection of available filters.
     */
    public Collection<ReadFilter> createFilters() {
-        Set<ReadFilter> filters = new HashSet<ReadFilter>();
-        filters.addAll(WalkerManager.getReadFilters(walker,this.getFilterManager()));
+        final List<ReadFilter> filters = WalkerManager.getReadFilters(walker,this.getFilterManager());
        if (this.getArguments().readGroupBlackList != null && this.getArguments().readGroupBlackList.size() > 0)
            filters.add(new ReadGroupBlackListFilter(this.getArguments().readGroupBlackList));
-        for(String filterName: this.getArguments().readFilters)
+        for(final String filterName: this.getArguments().readFilters)
            filters.add(this.getFilterManager().createByName(filterName));
-        return Collections.unmodifiableSet(filters);
+        return Collections.unmodifiableList(filters);
    }

    /**
@ -299,10 +297,14 @@ public class GenomeAnalysisEngine {
        else if(WalkerManager.getDownsamplingMethod(walker) != null)
            method = WalkerManager.getDownsamplingMethod(walker);
        else
-            method = argCollection.getDefaultDownsamplingMethod();
+            method = GATKArgumentCollection.getDefaultDownsamplingMethod();
        return method;
    }

+    protected void setDownsamplingMethod(DownsamplingMethod method) {
+        argCollection.setDownsamplingMethod(method);
+    }
+
    public BAQ.QualityMode getWalkerBAQQualityMode()         { return WalkerManager.getBAQQualityMode(walker); }
    public BAQ.ApplicationTime getWalkerBAQApplicationTime() { return WalkerManager.getBAQApplicationTime(walker); }    

@ -381,18 +383,18 @@ public class GenomeAnalysisEngine {
        // If intervals is non-null and empty at this point, it means that the list of intervals to process
        // was filtered down to an empty set (eg., the user specified something like -L chr1 -XL chr1). Since
        // this was very likely unintentional, the user should be informed of this. Note that this is different
-        // from the case where intervals == null, which indicates either that there were no interval arguments,
-        // or that -L all was specified.
+        // from the case where intervals == null, which indicates that there were no interval arguments.
        if ( intervals != null && intervals.isEmpty() ) {
-            throw new ArgumentException("The given combination of -L and -XL options results in an empty set. " +
-                                        "No intervals to process.");
+            logger.warn("The given combination of -L and -XL options results in an empty set.  No intervals to process.");
        }
    }

    /**
     * Get the sharding strategy given a driving data source.
     *
+     * @param readsDataSource readsDataSource
     * @param drivingDataSource Data on which to shard.
+     * @param intervals intervals
     * @return the sharding strategy
     */
    protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
@ -429,7 +431,7 @@ public class GenomeAnalysisEngine {
            return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
        }

-        ShardStrategy shardStrategy = null;
+        ShardStrategy shardStrategy;
        ShardStrategyFactory.SHATTER_STRATEGY shardType;

        long SHARD_SIZE = 100000L;
@ -438,6 +440,8 @@ public class GenomeAnalysisEngine {
            if (walker instanceof RodWalker) SHARD_SIZE *= 1000;

            if (intervals != null && !intervals.isEmpty()) {
+                if (readsDataSource == null)
+                    throw new IllegalArgumentException("readsDataSource is null");
                if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data.  Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");

@ -501,7 +505,8 @@ public class GenomeAnalysisEngine {
     */
    private void initializeTempDirectory() {
        File tempDir = new File(System.getProperty("java.io.tmpdir"));
-        tempDir.mkdirs();
+        if (!tempDir.exists() && !tempDir.mkdirs())
+            throw new UserException.BadTmpDir("Unable to create directory");
    }

    /**
@ -566,34 +571,23 @@ public class GenomeAnalysisEngine {
    protected void initializeIntervals() {

        // return if no interval arguments at all
-        if ((argCollection.intervals == null) && (argCollection.excludeIntervals == null) && (argCollection.RODToInterval == null))
+        if ( argCollection.intervals == null && argCollection.excludeIntervals == null )
            return;

-        // if '-L all' was specified, verify that it was the only -L specified and return if so.
-        if(argCollection.intervals != null) {
-            for(String interval: argCollection.intervals) {
-                if(interval.trim().equals("all")) {
-                    if(argCollection.intervals.size() > 1)
-                        throw new UserException("'-L all' was specified along with other intervals or interval lists; the GATK cannot combine '-L all' with other intervals.");
-
-                    // '-L all' was specified and seems valid.  Return.
-                    return;
-                }
-            }
-        }
+        // Note that the use of '-L all' is no longer supported.

        // if include argument isn't given, create new set of all possible intervals
-        GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null && argCollection.RODToInterval == null ?
+        GenomeLocSortedSet includeSortedSet = (argCollection.intervals == null ?
            GenomeLocSortedSet.createSetFromSequenceDictionary(this.referenceDataSource.getReference().getSequenceDictionary()) :
-            loadIntervals(argCollection.intervals, IntervalUtils.mergeIntervalLocations(getRODIntervals(), argCollection.intervalMerging)));
+            loadIntervals(argCollection.intervals, argCollection.intervalSetRule));

        // if no exclude arguments, can return parseIntervalArguments directly
-        if (argCollection.excludeIntervals == null)
+        if ( argCollection.excludeIntervals == null )
            intervals = includeSortedSet;

-            // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
+        // otherwise there are exclude arguments => must merge include and exclude GenomeLocSortedSets
        else {
-            GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, null);
+            GenomeLocSortedSet excludeSortedSet = loadIntervals(argCollection.excludeIntervals, IntervalSetRule.UNION);
            intervals = includeSortedSet.subtractRegions(excludeSortedSet);

            // logging messages only printed when exclude (-XL) arguments are given
@ -608,47 +602,26 @@ public class GenomeAnalysisEngine {

    /**
     * Loads the intervals relevant to the current execution
-     * @param argList String representation of arguments; might include 'all', filenames, intervals in samtools
-     *                notation, or a combination of the above
-     * @param rodIntervals a list of ROD intervals to add to the returned set.  Can be empty or null.
+     * @param argList  argument bindings; might include filenames, intervals in samtools notation, or a combination of the above
+     * @param rule     interval merging rule
     * @return A sorted, merged list of all intervals specified in this arg list.
     */
-    protected GenomeLocSortedSet loadIntervals( List<String> argList, List<GenomeLoc> rodIntervals ) {
+    protected GenomeLocSortedSet loadIntervals( List<IntervalBinding<Feature>> argList, IntervalSetRule rule ) {

-        boolean allowEmptyIntervalList = (argCollection.unsafe == ValidationExclusion.TYPE.ALLOW_EMPTY_INTERVAL_LIST ||
-                                          argCollection.unsafe == ValidationExclusion.TYPE.ALL);
+        List<GenomeLoc> allIntervals = new ArrayList<GenomeLoc>(0);
+        for ( IntervalBinding intervalBinding : argList ) {
+            List<GenomeLoc> intervals = intervalBinding.getIntervals(this);

-        List<GenomeLoc> nonRODIntervals = IntervalUtils.parseIntervalArguments(genomeLocParser, argList, allowEmptyIntervalList);
-        List<GenomeLoc> allIntervals = IntervalUtils.mergeListsBySetOperator(rodIntervals, nonRODIntervals, argCollection.BTIMergeRule);
+            if ( intervals.isEmpty() ) {
+                logger.warn("The interval file " + intervalBinding.getSource() + " contains no intervals that could be parsed.");
+            }
+
+            allIntervals = IntervalUtils.mergeListsBySetOperator(intervals, allIntervals, rule);
+        }

        return IntervalUtils.sortAndMergeIntervals(genomeLocParser, allIntervals, argCollection.intervalMerging);
    }

-    /**
-     * if we have a ROD specified as a 'rodToIntervalTrackName', convert its records to RODs
-     * @return ROD intervals as GenomeLocs
-     */
-    private List<GenomeLoc> getRODIntervals() {
-        Map<String, ReferenceOrderedDataSource> rodNames = RMDIntervalGenerator.getRMDTrackNames(rodDataSources);
-        // Do we have any RODs that overloaded as interval lists with the 'rodToIntervalTrackName' flag?
-        List<GenomeLoc> ret = new ArrayList<GenomeLoc>();
-        if (rodNames != null && argCollection.RODToInterval != null) {
-            String rodName = argCollection.RODToInterval;
-
-            // check to make sure we have a rod of that name
-            if (!rodNames.containsKey(rodName))
-                throw new UserException.CommandLineException("--rodToIntervalTrackName (-BTI) was passed the name '"+rodName+"', which wasn't given as a ROD name in the -B option");
-
-            for (String str : rodNames.keySet())
-                if (str.equals(rodName)) {
-                    logger.info("Adding interval list from track (ROD) named " + rodName);
-                    RMDIntervalGenerator intervalGenerator = new RMDIntervalGenerator(rodNames.get(str));
-                    ret.addAll(intervalGenerator.toGenomeLocList());
-                }
-        }
-        return ret;
-    }
-
    /**
     * Add additional, externally managed IO streams for inputs.
     *
@ -692,12 +665,22 @@ public class GenomeAnalysisEngine {
        for (ReadFilter filter : filters)
            filter.initialize(this);

-        sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
-
        // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
        rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
    }

+    /**
+     * Entry-point function to initialize the samples database from input data and pedigree arguments
+     */
+    private void initializeSampleDB() {
+        SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
+        sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
+        sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
+        sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
+        sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
+        sampleDB = sampleDBBuilder.getFinalSampleDB();
+    }
+
    /**
     * Gets a unique identifier for the reader sourcing this read.
     * @param read Read to examine.
@ -716,106 +699,13 @@ public class GenomeAnalysisEngine {
        return getReadsDataSource().getSAMFile(id);
    }

-    /**
-     * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
-     * found in the corresponding bam file.
-     *
-     * @return Sets of samples in the merged input SAM stream, grouped by readers
-     */
-    public List<Set<String>> getSamplesByReaders() {
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> samples = new HashSet<String>(1);
-            sample_sets.add(samples);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                samples.add(g.getSample());
-            }
-        }
-
-        return sample_sets;
-
-    }
-
-    /**
-     * Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
-     * found in the corresponding bam file.
-     *
-     * @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
-     */
-    public List<Set<String>> getLibrariesByReaders() {
-
-
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> libs = new HashSet<String>(2);
-            lib_sets.add(libs);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                libs.add(g.getLibrary());
-            }
-        }
-
-        return lib_sets;
-
-    }
-
-    /**
-     * **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
-     *
-     * Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
-     * (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
-     *
-     * @return sets of (merged) read group ids in order of input bams
-     */
-    public List<Set<String>> getMergedReadGroupsByReaders() {
-
-
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> groups = new HashSet<String>(5);
-            rg_sets.add(groups);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
-                    // use HeaderMerger to translate original read group id from the reader into the read group id in the
-                    // merged stream, and save that remapped read group id to associate it with specific reader
-                    groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
-                } else {
-                    // otherwise, pass through the unmapped read groups since this is what Picard does as well
-                    groups.add(g.getReadGroupId());
-                }
-            }
-        }
-
-        return rg_sets;
-
-    }
-
    /**
     * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
     *
     * @param reads     Reads data source.
     * @param reference Reference data source.
     * @param rods    a collection of the reference ordered data tracks
+     * @param manager manager
     */
    private void validateSourcesAgainstReference(SAMDataSource reads, ReferenceSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, RMDTrackBuilder manager) {
        if ((reads.isEmpty() && (rods == null || rods.isEmpty())) || reference == null )
@ -844,15 +734,22 @@ public class GenomeAnalysisEngine {
    /**
     * Gets a data source for the given set of reads.
     *
+     * @param argCollection arguments
+     * @param genomeLocParser parser
+     * @param refReader reader
     * @return A data source for the given set of reads.
     */
    private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
        DownsamplingMethod method = getDownsamplingMethod();

+        // Synchronize the method back into the collection so that it shows up when
+        // interrogating for the downsample method during command line recreation.
+        setDownsamplingMethod(method);
+
        if ( getWalkerBAQApplicationTime() == BAQ.ApplicationTime.FORBIDDEN && argCollection.BAQMode != BAQ.CalculationMode.OFF)
            throw new UserException.BadArgumentValue("baq", "Walker cannot accept BAQ'd base qualities, and yet BAQ mode " + argCollection.BAQMode + " was requested.");

-        SAMDataSource dataSource = new SAMDataSource(
+        return new SAMDataSource(
                samReaderIDs,
                genomeLocParser,
                argCollection.useOriginalBaseQualities,
@ -868,14 +765,12 @@ public class GenomeAnalysisEngine {
                refReader,
                argCollection.defaultBaseQualities,
                !argCollection.disableLowMemorySharding);
-        return dataSource;
    }

    /**
     * Opens a reference sequence file paired with an index.  Only public for testing purposes
     *
     * @param refFile Handle to a reference sequence file.  Non-null.
-     * @return A thread-safe file wrapper.
     */
    public void setReferenceDataSource(File refFile) {
        this.referenceDataSource = new ReferenceDataSource(refFile);
@ -929,6 +824,26 @@ public class GenomeAnalysisEngine {
        return readsDataSource.getHeader(reader);
    }

+    /**
+     * Returns an ordered list of the unmerged SAM file headers known to this engine.
+     * @return list of header for each input SAM file, in command line order
+     */
+    public List<SAMFileHeader> getSAMFileHeaders() {
+        final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
+        for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
+            headers.add(getReadsDataSource().getHeader(id));
+        }
+        return headers;
+    }
+
+    /**
+     * Gets the master sequence dictionary for this GATK engine instance
+     * @return a never-null dictionary listing all of the contigs known to this engine instance
+     */
+    public SAMSequenceDictionary getMasterSequenceDictionary() {
+        return getReferenceDataSource().getReference().getSequenceDictionary();
+    }
+
    /**
     * Returns data source object encapsulating all essential info and handlers used to traverse
     * reads; header merger, individual file readers etc can be accessed through the returned data source object.
@ -939,8 +854,6 @@ public class GenomeAnalysisEngine {
        return this.readsDataSource;
    }

-
-
    /**
     * Sets the collection of GATK main application arguments.
     *
@ -1027,140 +940,14 @@ public class GenomeAnalysisEngine {
        return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
    }

-    public SampleDataSource getSampleMetadata() {
-        return this.sampleDataSource;
-    }
+    // -------------------------------------------------------------------------------------
+    //
+    // code for working with Samples database
+    //
+    // -------------------------------------------------------------------------------------

-    /**
-     * Get a sample by its ID
-     * If an alias is passed in, return the main sample object
-     * @param id sample id
-     * @return sample Object with this ID
-     */
-    public Sample getSampleById(String id) {
-        return sampleDataSource.getSampleById(id);
-    }
-
-    /**
-     * Get the sample for a given read group
-     * Must first look up ID for read group
-     * @param readGroup of sample
-     * @return sample object with ID from the read group
-     */
-    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
-        return sampleDataSource.getSampleByReadGroup(readGroup);
-    }
-
-    /**
-     * Get a sample for a given read
-     * Must first look up read group, and then sample ID for that read group
-     * @param read of sample
-     * @return sample object of this read
-     */
-    public Sample getSampleByRead(SAMRecord read) {
-        return getSampleByReadGroup(read.getReadGroup());
-    }
-
-    /**
-     * Get number of sample objects
-     * @return size of samples map
-     */
-    public int sampleCount() {
-        return sampleDataSource.sampleCount();
-    }
-
-    /**
-     * Return all samples with a given family ID
-     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
-     * @param familyId family ID
-     * @return Samples with the given family ID
-     */
-    public Set<Sample> getFamily(String familyId) {
-        return sampleDataSource.getFamily(familyId);
-    }
-
-    /**
-     * Returns all children of a given sample
-     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
-     * @param sample parent sample
-     * @return children of the given sample
-     */
-    public Set<Sample> getChildren(Sample sample) {
-        return sampleDataSource.getChildren(sample);
-    }
-
-    /**
-     * Gets all the samples
-     * @return
-     */
-    public Collection<Sample> getSamples() {
-        return sampleDataSource.getSamples();
-    }
-
-    /**
-     * Takes a list of sample names and returns their corresponding sample objects
-     *
-     * @param sampleNameList List of sample names
-     * @return Corresponding set of samples
-     */
-    public Set<Sample> getSamples(Collection<String> sampleNameList) {
-	return sampleDataSource.getSamples(sampleNameList);
-    }
-
-
-    /**
-     * Returns a set of samples that have any value (which could be null) for a given property
-     * @param key Property key
-     * @return Set of samples with the property
-     */
-    public Set<Sample> getSamplesWithProperty(String key) {
-        return sampleDataSource.getSamplesWithProperty(key);
-    }
-
-    /**
-     * Returns a set of samples that have a property with a certain value
-     * Value must be a string for now - could add a similar method for matching any objects in the future
-     *
-     * @param key Property key
-     * @param value String property value
-     * @return Set of samples that match key and value
-     */
-    public Set<Sample> getSamplesWithProperty(String key, String value) {
-        return sampleDataSource.getSamplesWithProperty(key, value);
-
-    }
-
-    /**
-     * Returns a set of sample objects for the sample names in a variant context
-     *
-     * @param context Any variant context
-     * @return a set of the sample objects
-     */
-    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
-        Set<Sample> samples = new HashSet<Sample>();
-        for (String sampleName : context.getSampleNames()) {
-            samples.add(sampleDataSource.getOrCreateSample(sampleName));
-        }
-        return samples;
-    }
-
-    /**
-     * Returns all samples that were referenced in the SAM file
-     */
-    public Set<Sample> getSAMFileSamples() {
-        return sampleDataSource.getSAMFileSamples();
-    }
-
-    /**
-     * Return a subcontext restricted to samples with a given property key/value
-     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
-     * @param context VariantContext to filter
-     * @param key property key
-     * @param value property value (must be string)
-     * @return subcontext
-     */
-    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
-        return sampleDataSource.subContextFromSampleProperty(context, key, value);
+    public SampleDB getSampleDB() {
+        return this.sampleDB;
    }

    public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
@ -30,6 +30,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
+import java.util.TreeMap;

 /**
 * Holds a bunch of basic information about the traversal.
@ -102,8 +103,12 @@ public class ReadMetrics implements Cloneable {
        counter.put(filter.getClass(), c + 1L);
    }

-    public Map<Class,Long> getCountsByFilter() {
-        return Collections.unmodifiableMap(counter);
+    public Map<String,Long> getCountsByFilter() {
+        final TreeMap<String, Long> sortedCounts = new TreeMap<String, Long>();
+        for(Map.Entry<Class,Long> counterEntry: counter.entrySet()) {
+            sortedCounts.put(counterEntry.getKey().getSimpleName(),counterEntry.getValue());
+        }
+        return sortedCounts;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/DbsnpArgumentCollection.java
@ -29,13 +29,11 @@ package org.broadinstitute.sting.gatk.arguments;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.simpleframework.xml.*;

 /**
 * @author ebanks
 * @version 1.0
 */
-@Root
 public class DbsnpArgumentCollection {

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -26,34 +26,26 @@
 package org.broadinstitute.sting.gatk.arguments;

 import net.sf.samtools.SAMFileReader;
+import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Input;
+import org.broadinstitute.sting.commandline.IntervalBinding;
 import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
+import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
 import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
-import org.simpleframework.xml.*;
-import org.simpleframework.xml.core.Persister;
-import org.simpleframework.xml.stream.Format;
-import org.simpleframework.xml.stream.HyphenStyle;

 import java.io.File;
-import java.io.InputStream;
-import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;

 /**
 * @author aaron
 * @version 1.0
 */
-@Root
 public class GATKArgumentCollection {

    /* our version number */
@ -64,58 +56,58 @@ public class GATKArgumentCollection {
    public GATKArgumentCollection() {
    }

-    @ElementMap(entry = "analysis_argument", key = "key", attribute = true, inline = true, required = false)
    public Map<String, String> walkerArgs = new HashMap<String, String>();

    // parameters and their defaults
-    @ElementList(required = false)
    @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
    public List<String> samFiles = new ArrayList<String>();

-    // parameters and their defaults
-    @ElementList(required = false)
-    @Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
-    public List<File> sampleFiles = new ArrayList<File>();
-
-    @Element(required = false)
    @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
    public Integer readBufferSize = null;

-    @Element(required = false)
    @Argument(fullName = "phone_home", shortName = "et", doc="What kind of GATK run report should we generate?  Standard is the default, can be verbose or NO_ET so nothing is posted to the run repository", required = false)
    public GATKRunReport.PhoneHomeOption phoneHomeType = GATKRunReport.PhoneHomeOption.STANDARD;

-    @ElementList(required = false)
-    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually.", required = false)
+    @Argument(fullName = "read_filter", shortName = "rf", doc = "Specify filtration criteria to apply to each read individually", required = false)
    public List<String> readFilters = new ArrayList<String>();

-    @ElementList(required = false)
-    @Input(fullName = "intervals", shortName = "L", doc = "A list of genomic intervals over which to operate. Can be explicitly specified on the command line or in a file.", required = false)
-    public List<String> intervals = null;
+    /**
+     * Using this option one can instruct the GATK engine to traverse over only part of the genome.  This argument can be specified multiple times.
+     * One may use samtools-style intervals either explicitly (e.g. -L chr1 or -L chr1:100-200) or listed in a file (e.g. -L myFile.intervals).
+     * Additionally, one may specify a rod file to traverse over the positions for which there is a record in the file (e.g. -L file.vcf).
+     */
+    @Input(fullName = "intervals", shortName = "L", doc = "One or more genomic intervals over which to operate. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
+    public List<IntervalBinding<Feature>> intervals = null;

-    @ElementList(required = false)
-    @Input(fullName = "excludeIntervals", shortName = "XL", doc = "A list of genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file.", required = false)
-    public List<String> excludeIntervals = null;
+    /**
+     * Using this option one can instruct the GATK engine NOT to traverse over certain parts of the genome.  This argument can be specified multiple times.
+     * One may use samtools-style intervals either explicitly (e.g. -XL chr1 or -XL chr1:100-200) or listed in a file (e.g. -XL myFile.intervals).
+     * Additionally, one may specify a rod file to skip over the positions for which there is a record in the file (e.g. -XL file.vcf).
+     */
+    @Input(fullName = "excludeIntervals", shortName = "XL", doc = "One or more genomic intervals to exclude from processing. Can be explicitly specified on the command line or in a file (including a rod file)", required = false)
+    public List<IntervalBinding<Feature>> excludeIntervals = null;
+
+    /**
+     * How should the intervals specified by multiple -L or -XL arguments be combined?  Using this argument one can, for example, traverse over all of the positions
+     * for which there is a record in a VCF but just in chromosome 20 (-L chr20 -L file.vcf -isr INTERSECTION).
+     */
+    @Argument(fullName = "interval_set_rule", shortName = "isr", doc = "Indicates the set merging approach the interval parser should use to combine the various -L or -XL inputs", required = false)
+    public IntervalSetRule intervalSetRule = IntervalSetRule.UNION;
+
+    /**
+     * Should abutting (but not overlapping) intervals be treated as separate intervals?
+     */
+    @Argument(fullName = "interval_merging", shortName = "im", doc = "Indicates the interval merging rule we should use for abutting intervals", required = false)
+    public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;

-    @Element(required = false)
    @Input(fullName = "reference_sequence", shortName = "R", doc = "Reference sequence file", required = false)
    public File referenceFile = null;

    @Deprecated
    @Hidden
-    @ElementList(required = false)
    @Input(fullName = "rodBind", shortName = "B", doc = "Bindings for reference-ordered data, in the form :<name>,<type> <file>", required = false)
    public ArrayList<String> RODBindings = new ArrayList<String>();

-    @Element(required = false)
-    @Argument(fullName = "rodToIntervalTrackName", shortName = "BTI", doc = "Indicates that the named track should be converted into an interval list, to drive the traversal", required = false)
-    public String RODToInterval = null;
-
-    @Element(required = false)
-    @Argument(fullName = "BTI_merge_rule", shortName = "BTIMR", doc = "Indicates the merging approach the interval parser should use to combine the BTI track with other -L options", required = false)
-    public IntervalSetRule BTIMergeRule = IntervalSetRule.UNION;
-
-    @Element(required = false)
    @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
    public boolean nonDeterministicRandomSeed = false;

@ -128,22 +120,19 @@ public class GATKArgumentCollection {
    private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
    private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;

-    @Element(required = false)
-    @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus.  Reads will be selected randomly to be removed from the pile based on the method described here.", required = false)
+    @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus.  Reads will be selected randomly to be removed from the pile based on the method described here", required = false)
    public DownsampleType downsamplingType = null;

-    @Element(required = false)
    @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
    public Double downsampleFraction = null;

-    @Element(required = false)
    @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
    public Integer downsampleCoverage = null;

    /**
     * Gets the downsampling method explicitly specified by the user.  If the user didn't specify
-     * a default downsampling mechanism, return null.
-     * @return The explicitly specified downsampling mechanism, or null if none exists.
+     * a default downsampling mechanism, return the default.
+     * @return The explicitly specified downsampling mechanism, or the default if none exists.
     */
    public DownsamplingMethod getDownsamplingMethod() {
        if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
@ -153,16 +142,26 @@ public class GATKArgumentCollection {
        return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
    }

+    /**
+     * Set the downsampling method stored in the argument collection so that it is read back out when interrogating the command line arguments.
+     * @param method The downsampling mechanism.
+     */
+    public void setDownsamplingMethod(DownsamplingMethod method) {
+        if (method == null)
+            throw new IllegalArgumentException("method is null");
+        downsamplingType = method.type;
+        downsampleCoverage = method.toCoverage;
+        downsampleFraction = method.toFraction;
+    }
+
    // --------------------------------------------------------------------------------------------------------------
    //
    // BAQ arguments
    //
    // --------------------------------------------------------------------------------------------------------------
-    @Element(required = false)
    @Argument(fullName = "baq", shortName="baq", doc="Type of BAQ calculation to apply in the engine", required = false)
    public BAQ.CalculationMode BAQMode = BAQ.CalculationMode.OFF;

-    @Element(required = false)
    @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets", required = false)
    public double BAQGOP = BAQ.DEFAULT_GOP;

@ -171,7 +170,6 @@ public class GATKArgumentCollection {
    // performance log arguments
    //
    // --------------------------------------------------------------------------------------------------------------
-    @Element(required = false)
    @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
    public File performanceLog = null;

@ -184,67 +182,117 @@ public class GATKArgumentCollection {
        return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
    }

-    @Element(required = false)
    @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
    public Boolean useOriginalBaseQualities = false;

    @Argument(fullName="defaultBaseQualities", shortName = "DBQ", doc = "If reads are missing some or all base quality scores, this value will be used for all base quality scores", required=false)
    public byte defaultBaseQualities = -1;

-    @Element(required = false)
    @Argument(fullName = "validation_strictness", shortName = "S", doc = "How strict should we be with validation", required = false)
    public SAMFileReader.ValidationStringency strictnessLevel = SAMFileReader.ValidationStringency.SILENT;

-    @Element(required = false)
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

-    /** How many threads should be allocated to this analysis. */
-    @Element(required = false)
-    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
+    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis", required = false)
    public int numberOfThreads = 1;

-    /** What rule should we use when merging intervals */
-    @Element(required = false)
-    @Argument(fullName = "interval_merging", shortName = "im", doc = "What interval merging rule should we use.", required = false)
-    public IntervalMergingRule intervalMerging = IntervalMergingRule.ALL;
-
-    @ElementList(required = false)
-    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
+    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line", required = false)
    public List<String> readGroupBlackList = null;

    // --------------------------------------------------------------------------------------------------------------
    //
-    // distributed GATK arguments
+    // PED (pedigree) support
    //
    // --------------------------------------------------------------------------------------------------------------
-    @Element(required=false)
-    @Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
-    @Hidden
-    public File processingTrackerFile = null;

-    @Element(required=false)
-    @Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
-    @Hidden
-    public boolean restartProcessingTracker = false;
+    /**
+     * <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
+     * processed in the GATK.</p>
+     *
+     * <ul>
+     *  <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
+     *  <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
+     * </ul>
+     *
+     * <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
+     *
+     * <ul>
+     *  <li>Family ID</li>
+     *  <li>Individual ID</li>
+     *  <li>Paternal ID</li>
+     *  <li>Maternal ID</li>
+     *  <li>Sex (1=male; 2=female; other=unknown)</li>
+     *  <li>Phenotype</li>
+     * </ul>
+     *
+     *  <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
+     *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
+     *  quantitative trait or an affection status column: GATK will automatically detect which type
+     *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
+     *
+     *  <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
+     *
+     *  <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
+     *  line will be ignored. Do not start any family IDs with this character therefore.</p>
+     *
+     *  <p>Affection status should be coded:</p>
+     *
+     * <ul>
+     *  <li>-9 missing</li>
+     *   <li>0 missing</li>
+     *   <li>1 unaffected</li>
+     *   <li>2 affected</li>
+     * </ul>
+     *
+     * <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
+     * to phenotype values are interpreted as string phenotype values.  In this case -9 uniquely
+     * represents the missing value.</p>
+     *
+     * <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
+     *
+     * <p>For example, here are two individuals (one row = one person):</p>
+     *
+     * <pre>
+     *   FAM001  1  0 0  1  2
+     *   FAM001  2  0 0  1  2
+     * </pre>
+     *
+     * <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
+     * tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
+     *
+     * <p>Note that most GATK walkers do not use pedigree information.  Walkers that require pedigree
+     * data should clearly indicate so in their arguments and will throw errors if required pedigree
+     * information is missing.</p>
+     */
+    @Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
+    public List<File> pedigreeFiles = Collections.emptyList();

-    @Element(required=false)
-    @Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file.  For debugging, only",required=false)
-    @Hidden
-    public File processingTrackerStatusFile = null;
+    /**
+     * Inline PED records (see -ped argument).  Each -pedString STRING can contain one or more
+     * valid PED records (see -ped) separated by semi-colons.  Supports all tags for each pedString
+     * as -ped supports
+     */
+    @Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
+    public List<String> pedigreeStrings = Collections.emptyList();

-    @Element(required=false)
-    @Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
-    @Hidden
-    public int processTrackerID = -1;
+    /**
+     * How strict should we be in parsing the PED files?
+     */
+    @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
+    public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // BAM indexing and sharding arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------

-    @Element(required = false)
    @Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM.  NO INTEGRATION TESTS are available.  Use at your own risk.",required=false)
    @Hidden
    public boolean allowIntervalsWithUnindexedBAM = false;

-    @Element(required = false)
-    @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality.",required=false)
+    @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality",required=false)
    public boolean disableLowMemorySharding = false;

    // --------------------------------------------------------------------------------------------------------------
@ -253,69 +301,6 @@ public class GATKArgumentCollection {
    //
    // --------------------------------------------------------------------------------------------------------------

-    /**
-     * marshal the data out to a object
-     *
-     * @param collection the GATKArgumentCollection to load into
-     * @param outputFile the file to write to
-     */
-    public static void marshal(GATKArgumentCollection collection, String outputFile) {
-        Serializer serializer = new Persister(new Format(new HyphenStyle()));
-        File result = new File(outputFile);
-        try {
-            serializer.write(collection, result);
-        } catch (Exception e) {
-            throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
-        }
-    }
-
-    /**
-     * marshal the data out to a object
-     *
-     * @param collection the GATKArgumentCollection to load into
-     * @param outputFile the stream to write to
-     */
-    public static void marshal(GATKArgumentCollection collection, PrintStream outputFile) {
-        Serializer serializer = new Persister(new Format(new HyphenStyle()));
-        try {
-            serializer.write(collection, outputFile);
-        } catch (Exception e) {
-            throw new ReviewedStingException("Failed to marshal the data to the file " + outputFile, e);
-        }
-    }
-
-    /**
-     * unmashall the object from a configuration file
-     *
-     * @param filename the filename to marshal from
-     */
-    public static GATKArgumentCollection unmarshal(String filename) {
-        Serializer serializer = new Persister(new Format(new HyphenStyle()));
-        File source = new File(filename);
-        try {
-            GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, source);
-            return example;
-        } catch (Exception e) {
-            throw new ReviewedStingException("Failed to marshal the data from file " + filename, e);
-        }
-    }
-
-    /**
-     * unmashall the object from a configuration file
-     *
-     * @param file the inputstream to marshal from
-     */
-    public static GATKArgumentCollection unmarshal(InputStream file) {
-        Serializer serializer = new Persister(new Format(new HyphenStyle()));
-        try {
-            GATKArgumentCollection example = serializer.read(GATKArgumentCollection.class, file);
-            return example;
-        } catch (Exception e) {
-            throw new ReviewedStingException("Failed to marshal the data from file " + file.toString(), e);
-        }
-    }
-
-
    /**
     * test equality between two arg collections.  This function defines the statement:
     * "not fun to write"
@ -363,7 +348,7 @@ public class GATKArgumentCollection {
        if (!other.referenceFile.equals(this.referenceFile)) {
            return false;
        }
-        if (!other.intervals.equals(this.intervals)) {
+        if ((other.intervals == null && this.intervals != null) || !other.intervals.equals(this.intervals)) {
            return false;
        }
        if (!other.excludeIntervals.equals(this.excludeIntervals)) {
@ -386,39 +371,21 @@ public class GATKArgumentCollection {
        if (other.intervalMerging != this.intervalMerging) {
            return false;
        }
-        if ((other.RODToInterval == null && RODToInterval != null) ||
-            (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
-            return false;
-        }

        if (other.phoneHomeType != this.phoneHomeType) {
            return false;
        }

-        if (BTIMergeRule != other.BTIMergeRule)
+        if (intervalSetRule != other.intervalSetRule)
            return false;

-        if ( BAQMode != other.BAQMode) return false;
+        if ( BAQMode != other.BAQMode ) return false;
        if ( BAQGOP != other.BAQGOP ) return false;

        if ((other.performanceLog == null && this.performanceLog != null) ||
                (other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
            return false;

-        if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
-                (other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
-            return false;
-
-        if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
-                (other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
-            return false;
-
-        if ( restartProcessingTracker != other.restartProcessingTracker )
-            return false;
-
-        if ( processTrackerID != other.processTrackerID )
-            return false;
-
        if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
            return false;

--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/StandardVariantContextInputArgumentCollection.java
@ -28,13 +28,11 @@ package org.broadinstitute.sting.gatk.arguments;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.simpleframework.xml.Root;

 /**
 * @author ebanks
 * @version 1.0
 */
-@Root
 public class StandardVariantContextInputArgumentCollection {

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/ValidationExclusion.java
@ -37,7 +37,6 @@ public class ValidationExclusion {

    public enum TYPE {
        ALLOW_UNINDEXED_BAM,        // allow bam files that do not have an index; we'll traverse them using monolithic shard
-        ALLOW_EMPTY_INTERVAL_LIST,  // allow the user to pass in an empty interval list
        ALLOW_UNSET_BAM_SORT_ORDER, // assume that the bam is sorted, even if the SO (sort-order) flag is not set
        NO_READ_ORDER_VERIFICATION, // do not validate that the reads are in order as we take them from the bam file
        ALLOW_SEQ_DICT_INCOMPATIBILITY, // allow dangerous, but not fatal, sequence dictionary incompabilities
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContext.java
@ -25,12 +25,12 @@

 package org.broadinstitute.sting.gatk.contexts;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.HasGenomeLocation;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileup;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.List;

@ -130,7 +130,7 @@ public class AlignmentContext implements HasGenomeLocation {
     */
    @Deprecated
    //todo: unsafe and tailored for current usage only; both pileups can be null or worse, bot can be not null in theory
-    public List<SAMRecord> getReads() { return ( basePileup.getReads() ); }
+    public List<GATKSAMRecord> getReads() { return ( basePileup.getReads() ); }

    /**
     * Are there any reads associated with this locus?
@ -138,7 +138,7 @@ public class AlignmentContext implements HasGenomeLocation {
     * @return
     */
    public boolean hasReads() {
-        return basePileup != null && basePileup.size() > 0 ;
+        return basePileup != null && basePileup.getNumberOfElements() > 0 ;
    }

    /**
@ -146,7 +146,7 @@ public class AlignmentContext implements HasGenomeLocation {
     * @return
     */
    public int size() {
-        return basePileup.size();
+        return basePileup.getNumberOfElements();
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.contexts;

 import net.sf.samtools.SAMReadGroupRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
        return splitContextBySampleName(context, null);
    }

-    public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
-        Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
-        for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
-            m.put(new Sample(entry.getKey()), entry.getValue());
-        }
-        return m;
-    }
-
    /**
     * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
     * of sample object.
@ -97,11 +88,11 @@ public class AlignmentContextUtils {
        GenomeLoc loc = context.getLocation();
        HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();

-        for(String sample: context.getPileup().getSampleNames()) {
-            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
+        for(String sample: context.getPileup().getSamples()) {
+            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);

            // Don't add empty pileups to the split context.
-            if(pileupBySample.size() == 0)
+            if(pileupBySample.getNumberOfElements() == 0)
                continue;

            if(sample != null)
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/AllLocusView.java
@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.datasources.providers;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.iterators.GenomeLocusIterator;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.Collections;
 import java.util.List;
@ -132,7 +132,7 @@ public class AllLocusView extends LocusView {
     * @param site Site at which to create the blank locus context.
     * @return empty context.
     */
-    private final static List<SAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
+    private final static List<GATKSAMRecord> EMPTY_PILEUP_READS = Collections.emptyList();
    private final static List<Integer> EMPTY_PILEUP_OFFSETS = Collections.emptyList();
    private AlignmentContext createEmptyLocus( GenomeLoc site ) {
        return new AlignmentContext(site,new ReadBackedPileupImpl(site, EMPTY_PILEUP_READS, EMPTY_PILEUP_OFFSETS));
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -59,7 +59,7 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
     */
    public FilePointer next() {
        FilePointer current = wrappedIterator.next();
-        while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
            current = current.combine(parser,wrappedIterator.next());
        return current;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -43,6 +43,7 @@ import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.baq.BAQSamIterator;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSamRecordFactory;

 import java.io.File;
 import java.lang.reflect.InvocationTargetException;
@ -57,6 +58,8 @@ import java.util.*;
 * Converts shards to SAM iterators over the specified region
 */
 public class SAMDataSource {
+    final private static GATKSamRecordFactory factory = new GATKSamRecordFactory();
+
    /** Backing support for reads. */
    protected final ReadProperties readProperties;

@ -235,6 +238,12 @@ public class SAMDataSource {
        for(SAMFileReader reader: readers.values()) {
            // Get the sort order, forcing it to coordinate if unsorted.
            SAMFileHeader header = reader.getFileHeader();
+
+            if ( header.getReadGroups().isEmpty() ) {
+                throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile,
+                        "SAM file doesn't have any read groups defined in the header.  The GATK no longer supports SAM files without read groups");
+            }
+
            SAMFileHeader.SortOrder sortOrder = header.getSortOrder() != SAMFileHeader.SortOrder.unsorted ? header.getSortOrder() : SAMFileHeader.SortOrder.coordinate;

            // Validate that all input files are sorted in the same order.
@ -638,7 +647,9 @@ public class SAMDataSource {
                                                        BAQ.QualityMode qmode,
                                                        IndexedFastaSequenceFile refReader,
                                                        byte defaultBaseQualities) {
-        wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);
+        if ( useOriginalBaseQualities || defaultBaseQualities >= 0 )
+            // only wrap if we are replacing the original qualitiies or using a default base quality
+            wrappedIterator = new ReadFormattingIterator(wrappedIterator, useOriginalBaseQualities, defaultBaseQualities);

        // NOTE: this (and other filtering) should be done before on-the-fly sorting
        //  as there is no reason to sort something that we will end of throwing away
@ -750,6 +761,7 @@ public class SAMDataSource {
        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
            for(SAMReaderID readerID: readerIDs) {
                SAMFileReader reader = new SAMFileReader(readerID.samFile);
+                reader.setSAMRecordFactory(factory);
                reader.enableFileSource(true);
                reader.enableIndexMemoryMapping(false);
                if(!enableLowMemorySharding)
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
@ -97,7 +97,7 @@ public class FindLargeShards extends CommandLineProgram {
        // intervals
        GenomeLocSortedSet intervalSortedSet = null;
        if(intervals != null)
-            intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals, true), IntervalMergingRule.ALL);
+            intervalSortedSet = IntervalUtils.sortAndMergeIntervals(genomeLocParser, IntervalUtils.parseIntervalArguments(genomeLocParser, intervals), IntervalMergingRule.ALL);
        else {
            intervalSortedSet = new GenomeLocSortedSet(genomeLocParser);
            for(SAMSequenceRecord entry: refReader.getSequenceDictionary().getSequences())
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
@ -1,30 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 12, 2010
- * Time: 2:09:16 PM
- */
-public class PropertyDefinition {
-
-    String property;
-
-    String[] values;
-
-    public String getProperty() {
-        return property;
-    }
-
-    public void setProperty(String property) {
-        this.property = property;
-    }
-
-    public String[] getValues() {
-        return values;
-    }
-
-    public void setValues(String[] values) {
-        this.values = values;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
@ -1,203 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-
-import org.broadinstitute.sting.utils.exceptions.StingException;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Jul 26, 2010
- * Time: 3:31:38 PM
- */
-public class Sample implements java.io.Serializable {
-
-    private final String id;
-
-    private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
-
-    private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
-
-    private HashMap<String, Object> properties = new HashMap<String, Object>();
-
-    private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
-
-    public enum Gender {
-        MALE,
-        FEMALE,
-        UNKNOWN
-    }
-
-    public Sample(String id) {
-/*        if (id == null) {
-            throw new StingException("Error creating sample: sample ID cannot be null");
-        }*/
-        this.id = id;
-    }
-
-    public String getId() {
-        return this.id;
-    }
-
-    public Map<String, Object> getProperties() {
-        return properties;
-    }
-
-    public void setProperties(Map<String, Object> properties) {
-        this.properties = (HashMap) properties;
-    }
-
-    public Map<String,Sample> getRelationships() {
-        return Collections.unmodifiableMap(this.relationships);
-    }
-
-    public void setSampleFileEntry(boolean value) {
-        this.hasSampleFileEntry = value;
-    }
-
-    public boolean hasSAMFileEntry() {
-        return this.hasSAMFileEntry;
-    }
-
-    public void setSAMFileEntry(boolean value) {
-        this.hasSAMFileEntry = value;
-    }
-
-    public boolean hasSampleFileEntry() {
-        return this.hasSampleFileEntry;
-    }
-
-    /**
-     * Get one property
-     * @param key key of property
-     * @return value of property as generic object
-     */
-    public Object getProperty(String key) {
-        return properties.get(key);
-    }
-
-    /**
-     * Set a property
-     * If property already exists, it is overwritten
-     * @param key key of property
-     * @param value object to be stored in properties array
-     */
-    public void setProperty(String key, Object value) {
-
-        if (relationships.containsKey(key)) {
-            throw new StingException("The same key cannot exist as a property and a relationship");
-        }
-
-        if (key.equals("gender") && value.getClass() != Gender.class) {
-            throw new StingException("'gender' property must be of type Sample.Gender");
-        }
-
-        if (key.equals("population") && value.getClass() != String.class) {
-            throw new StingException("'population' property must be of type String");
-        }
-
-        properties.put(key, value);
-    }
-
-    /**
-     * Get one relationship
-     * @param key of relationship
-     * @return Sample object that this relationship points to
-     */
-    public Sample getRelationship(String key) {
-        return relationships.get(key);
-    }
-
-    /**
-     * Set one relationship
-     * If already set, it is overwritten
-     * @param key key of the relationship
-     * @param value Sample object this relationship points to
-     */
-    public void setRelationship(String key, Sample value) {
-        if (properties.containsKey(key)) {
-            throw new StingException("The same key cannot exist as a property and a relationship");
-        }
-        relationships.put(key, value);
-    }
-
-    /**
-     * Get the sample's mother
-     * @return sample object with relationship mother, if exists, or null
-     */
-    public Sample getMother() {
-        return getRelationship("mother");
-    }
-
-    /**
-     * Get the sample's father
-     * @return sample object with relationship father, if exists, or null
-     */
-    public Sample getFather() {
-        return getRelationship("father");
-    }
-
-    /**
-     * Get gender of the sample
-     * @return property of key "gender" - must be of type Gender
-     */
-    public Gender getGender() {
-        return (Gender) properties.get("gender");
-    }
-
-    public String getPopulation() {
-        return (String) properties.get("population");
-    }
-
-    public String getFamilyId() {
-        return (String) properties.get("familyId");
-    }
-
-    /**
-     * @return True if sample is male, false if female, unknown, or null
-     */
-    public boolean isMale() {
-        return properties.get("gender") == Gender.MALE;
-    }
-
-    /**
-     * @return True if sample is female, false if male, unknown or null
-     */
-    public boolean isFemale() {
-        return properties.get("gender") == Gender.MALE;
-    }
-
-    /**
-     *
-     * @param key property key
-     * @return true if sample has this property (even if its value is null)
-     */
-    public boolean hasProperty(String key) {
-        return properties.containsKey(key);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        Sample sample = (Sample) o;
-
-        if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
-        if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
-        if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
-        if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
-        if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
-            return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        return id != null ? id.hashCode() : "".hashCode();
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
@ -1,31 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 13, 2010
- * Time: 5:13:46 PM
- */
-public class SampleAlias {
-
-    String mainId;
-
-    String[] otherIds;
-
-    public String getMainId() {
-        return mainId;
-    }
-
-    public void setMainId(String mainId) {
-        this.mainId = mainId;
-    }
-
-    public String[] getOtherIds() {
-        return otherIds;
-    }
-
-    public void setOtherIds(String[] otherIds) {
-        this.otherIds = otherIds;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
@ -1,590 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMReadGroupRecord;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.yaml.snakeyaml.TypeDescription;
-import org.yaml.snakeyaml.Yaml;
-import org.yaml.snakeyaml.constructor.Constructor;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Jul 26, 2010
- * Time: 3:30:09 PM
- *
- * This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
- * in the GATK by the "--samples" argument. This class reads and parses those files.
- *
- * Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
- * only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
- * wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
- *
- */
-public class SampleDataSource {
-
-    /**
-     * SAMFileHeader that has been created for this analysis.
-     */
-    private SAMFileHeader header;
-
-    /**
-     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
-     * this is stored as a HashMap.
-     */
-    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
-
-    /**
-     * Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
-     * datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
-     *
-     * This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
-     */
-    private HashMap<String, String> sampleAliases = new HashMap<String, String>();
-
-    /**
-     * While loading sample files, we must be aware of "special" properties and relationships that are always allowed
-     */
-    public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
-    public static final String[] specialRelationships = new String[] {"mother", "father"};
-
-    /**
-     * Constructor takes both a SAM header and sample files because the two must be integrated.
-     * @param header SAMFileHeader that has been created for this analysis
-     * @param sampleFiles Sample files that were included on the command line
-     */
-    public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
-        this();
-        this.header = header;
-        // create empty sample object for each sample referenced in the SAM header
-        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
-            if (!hasSample(sampleName)) {
-                Sample newSample = new Sample(sampleName);
-                newSample.setSAMFileEntry(true);
-                samples.put(sampleName, newSample);
-            }
-        }
-
-        // add files consecutively
-        if (sampleFiles != null) {
-            for (File file : sampleFiles) {
-                addFile(file);
-            }
-        }
-    }
-
-    public SampleDataSource() {
-        samples.put(null, new Sample(null));
-    }
-
-    /**
-     * Hallucinates sample objects for all the samples in the SAM file and stores them
-     */
-    public void addSamplesFromSAMHeader(SAMFileHeader header) {
-        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
-            if (!hasSample(sampleName)) {
-                Sample newSample = new Sample(sampleName);
-                newSample.setSAMFileEntry(true);
-                samples.put(sampleName, newSample);
-            }
-        }
-    }
-
-    /**
-     * Parse one sample file and integrate it with samples that are already there
-     * Fail quickly if we find any errors in the file
-     */
-    public void addFile(File sampleFile) {
-
-        BufferedReader reader;
-        try {
-            reader = new BufferedReader(new FileReader(sampleFile));
-        }
-        catch (IOException e) {
-            throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
-        }
-
-        // set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
-        Constructor con = new Constructor(SampleFileParser.class);
-        TypeDescription desc = new TypeDescription(SampleFileParser.class);
-        desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
-        desc.putListPropertyType("sampleAliases", SampleAlias.class);
-        con.addTypeDescription(desc);
-        Yaml yaml = new Yaml(con);
-
-        // SampleFileParser stores an object representation of a sample file - this is what we'll parse
-        SampleFileParser parser;
-        try {
-            parser = (SampleFileParser) yaml.load(reader);
-        }
-        catch (Exception e) {
-            throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
-        }
-
-        // check to see which validation options were built into the file
-        boolean restrictProperties = parser.getAllowedProperties() != null;
-        boolean restrictRelationships = parser.getAllowedRelationships() != null;
-        boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
-
-        // propertyValues stores the values that are allowed for a given property
-        HashMap<String, HashSet> propertyValues = null;
-        if (restrictPropertyValues) {
-            propertyValues = new HashMap<String, HashSet>();
-            for (PropertyDefinition def : parser.getPropertyDefinitions()) {
-                HashSet<String> set = new HashSet<String>();
-                for (String value : def.getValues()) {
-                    set.add(value);
-                }
-                propertyValues.put(def.getProperty(), set);
-            }
-        }
-
-        // make sure the aliases are valid
-        validateAliases(parser);
-
-        // loop through each sample in the file - a SampleParser stores an object that will become a Sample
-        for (SampleParser sampleParser : parser.getSamples()) {
-
-            try {
-            // step 1: add the sample if it doesn't already exist
-            Sample sample = getSampleById(sampleParser.getId());
-            if (sample == null) {
-                sample = new Sample(sampleParser.getId());
-            }
-            addSample(sample);
-            sample.setSampleFileEntry(true);
-
-            // step 2: add the properties
-            if (sampleParser.getProperties() != null) {
-                for (String property : sampleParser.getProperties().keySet()) {
-
-                    // check that property is allowed
-                    if (restrictProperties) {
-                        if (!isPropertyValid(property, parser.getAllowedProperties())) {
-                            throw new StingException(property + " is an invalid property. It is not included in the list " +
-                                    "of allowed properties.");
-                        }
-                    }
-
-                    // next check that the value is allowed
-                    if (restrictPropertyValues) {
-                        if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
-                            throw new StingException("The value of property '" + property + "' is invalid. " +
-                                    "It is not included in the list of allowed values for this property.");
-                        }
-                    }
-
-                    // next check that there isn't already a conflicting property there
-                    if (sample.getProperty(property) != null &&
-                            sample.getProperty(property) != sampleParser.getProperties().get(property))
-                    {
-                        throw new StingException(property + " is a conflicting property!");
-                    }
-
-                    // checks are passed - now add the property!
-                    saveProperty(sample, property, sampleParser.getProperties().get(property));
-                }
-            }
-
-            // step 3: add the relationships
-            if (sampleParser.getRelationships() != null) {
-                for (String relationship : sampleParser.getRelationships().keySet()) {
-                    String relativeId = sampleParser.getRelationships().get(relationship);
-                    if (relativeId == null) {
-                        throw new StingException("The relationship cannot be null");
-                    }
-
-                    // first check that it's not invalid
-                    if (restrictRelationships) {
-                        if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
-                            throw new StingException(relationship + " is an invalid relationship");
-                        }
-                    }
-
-                    // next check that there isn't already a conflicting property there
-                    if (sample.getRelationship(relationship) != null) {
-                        if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
-                            throw new StingException(relationship + " is a conflicting relationship!");
-                        }
-                        // if the relationship is already set - and consistent with what we're reading now - no need to continue
-                        else {
-                            continue;
-                        }
-                    }
-
-                    // checks are passed - now save the relationship
-                    saveRelationship(sample, relationship, relativeId);
-                }
-            }
-        } catch (Exception e) {
-              throw new StingException("An error occurred while loading this sample from the sample file: " +
-                      sampleParser.getId(), e);
-        }
-        }
-
-    }
-
-    private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
-
-        // if the property values weren't specified for this property, then any value is okay
-        if (!valuesList.containsKey(key)) {
-            return true;
-        }
-
-        // if this property has enumerated values, it must be a string
-        else if (value.getClass() != String.class)
-            return false;
-
-        // is the value specified or not?
-        else if (!valuesList.get(key).contains(value))
-            return false;
-
-        return true;
-    }
-
-    /**
-     * Makes sure that the aliases are valid
-     * Checks that 1) no string is used as both a main ID and an alias;
-     * 2) no alias is used more than once
-     * @param parser
-     */
-    private void validateAliases(SampleFileParser parser) {
-
-        // no aliases sure validate
-        if (parser.getSampleAliases() == null)
-            return;
-
-        HashSet<String> mainIds = new HashSet<String>();
-        HashSet<String> otherIds = new HashSet<String>();
-
-        for (SampleAlias sampleAlias : parser.getSampleAliases()) {
-            mainIds.add(sampleAlias.getMainId());
-            for (String otherId : sampleAlias.getOtherIds()) {
-                if (mainIds.contains(otherId))
-                    throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
-                            "be both a main ID and an other ID", otherId));
-
-                if (!otherIds.add(otherId))
-                    throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
-                            "alias more than once.", otherId));
-            }
-        }
-    }
-
-    private boolean isPropertyValid(String property, String[] allowedProperties) {
-
-        // is it a special property that is always allowed?
-        for (String allowedProperty : specialProperties) {
-            if (property.equals(allowedProperty))
-                return true;
-        }
-
-        // is it in the allowed properties list?
-        for (String allowedProperty : allowedProperties) {
-            if (property.equals(allowedProperty))
-                return true;
-        }
-
-        return false;
-    }
-
-    private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
-
-        // is it a special relationship that is always allowed?
-        for (String allowedRelationship : specialRelationships) {
-            if (relationship.equals(allowedRelationship))
-                return true;
-        }
-
-        // is it in the allowed properties list?
-        for (String allowedRelationship : allowedRelationships) {
-            if (relationship.equals(allowedRelationship))
-                return true;
-        }
-
-        return false;
-    }
-
-    /**
-     * Saves a property as the correct type
-     * @param key property key
-     * @param value property value, as read from YAML parser
-     * @return property value to be stored
-     */
-    private void saveProperty(Sample sample, String key, Object value) {
-
-        // convert gender to the right type, if it was stored as a String
-        if (key.equals("gender")) {
-            if (((String) value).toLowerCase().equals("male")) {
-                value = Sample.Gender.MALE;
-            }
-            else if (((String) value).toLowerCase().equals("female")) {
-                value = Sample.Gender.FEMALE;
-            }
-            else  if (((String) value).toLowerCase().equals("unknown")) {
-                value = Sample.Gender.UNKNOWN;
-            }
-            else if (value != null) {
-                throw new StingException("'gender' property must be male, female, or unknown.");
-            }
-        }
-        try {
-            sample.setProperty(key, value);
-        }
-        catch (Exception e) {
-            throw new StingException("Could not save property " + key, e);
-        }
-    }
-
-    /**
-     * Saves a relationship as the correct type
-     * @param key relationship key
-     * @param relativeId sample ID string of the relative
-     * @return relationship value to be stored
-     */
-    private void saveRelationship(Sample sample, String key, String relativeId) {
-
-        // get the reference that we'll store as the value
-        Sample relative = getSampleById(relativeId);
-
-        // create sample object for the relative, if necessary
-        if (relative == null) {
-            relative = new Sample(relativeId);
-            addSample(relative);
-        }
-        sample.setRelationship(key, relative);
-    }
-
-
-
-    /**
-     * Filter a sample name in case it is an alias
-     * @param sampleId to be filtered
-     * @return ID of sample that stores data for this alias
-     */
-    private String aliasFilter(String sampleId) {
-        if (!sampleAliases.containsKey(sampleId))
-            return sampleId;
-        else
-            return sampleAliases.get(sampleId);
-    }
-
-    /**
-     * Add a sample to the collection
-     * @param sample to be added
-     */
-    private void addSample(Sample sample) {
-        samples.put(sample.getId(), sample);
-    }
-
-    /**
-     * Check if sample with this ID exists
-     * Note that this will return true if name passed in is an alias
-     * @param id ID of sample to be checked
-     * @return true if sample exists; false if not
-     */
-    public boolean hasSample(String id) {
-        return samples.get(aliasFilter(id)) != null;
-    }
-
-    /**
-     * Get a sample by its ID
-     * If an alias is passed in, return the main sample object 
-     * @param id
-     * @return sample Object with this ID
-     */
-    public Sample getSampleById(String id) {
-        return samples.get(aliasFilter(id));
-    }
-
-    /**
-     * Get the sample for a given read group
-     * Must first look up ID for read group
-     * @param readGroup of sample
-     * @return sample object with ID from the read group
-     */
-    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
-        String nameFromReadGroup = readGroup.getSample();
-        return getSampleById(nameFromReadGroup);
-    }
-
-    /**
-     * Get a sample for a given read
-     * Must first look up read group, and then sample ID for that read group
-     * @param read of sample
-     * @return sample object of this read
-     */
-    public Sample getSampleByRead(SAMRecord read) {
-        return getSampleByReadGroup(read.getReadGroup());
-    }
-
-    /**
-     * Get number of sample objects
-     * @return size of samples map
-     */
-    public int sampleCount() {
-        return samples.size();
-    }
-
-    /**
-     * Return all samples with a given family ID
-     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
-     * @param familyId
-     * @return
-     */
-    public Set<Sample> getFamily(String familyId) {
-        HashSet<Sample> familyMembers = new HashSet<Sample>();
-
-        for (Sample sample : samples.values()) {
-            if (sample.getFamilyId() != null) {
-                if (sample.getFamilyId().equals(familyId))
-                    familyMembers.add(sample);
-            }
-        }
-        return familyMembers;
-    }
-
-    /**
-     * Returns all children of a given sample
-     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
-     * @param sample
-     * @return
-     */
-    public Set<Sample> getChildren(Sample sample) {
-        HashSet<Sample> children = new HashSet<Sample>();
-        for (Sample familyMember : getFamily(sample.getFamilyId())) {
-            if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
-                children.add(familyMember);
-            }
-        }
-        return children;
-    }
-
-    public Set<Sample> getSamples() {
-        HashSet<Sample> set = new HashSet<Sample>();
-        set.addAll(samples.values());
-        return set;
-    }
-
-    /**
-     * Takes a collection of sample names and returns their corresponding sample objects
-     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
-     * @param sampleNameList Set of sample names
-     * @return Corresponding set of samples
-     */
-    public Set<Sample> getSamples(Collection<String> sampleNameList) {
-	HashSet<Sample> samples = new HashSet<Sample>();
-        for (String name : sampleNameList) {
-            try {
-                samples.add(getSampleById(name));
-            }
-            catch (Exception e) {
-                throw new StingException("Could not get sample with the following ID: " + name, e);
-            }
-        }
-        return samples;
-    }
-
-    /**
-     * Returns a set of samples that have any value (which could be null) for a given property
-     * @param key Property key
-     * @return Set of samples with the property
-     */
-    public Set<Sample> getSamplesWithProperty(String key) {
-        HashSet<Sample> toReturn = new HashSet<Sample>();
-        for (Sample s : samples.values()) {
-            if (s.hasProperty(key))
-                toReturn.add(s);
-        }
-        return toReturn;
-    }
-
-    /**
-     * Returns a set of samples that have a property with a certain value
-     * Value must be a string for now - could add a similar method for matching any objects in the future
-     *
-     * @param key Property key
-     * @param value String property value
-     * @return Set of samples that match key and value
-     */
-    public Set<Sample> getSamplesWithProperty(String key, String value) {
-        Set<Sample> toReturn = getSamplesWithProperty(key);
-        for (Sample s : toReturn) {
-            if (!s.getProperty(key).equals(value))
-                toReturn.remove(s);
-        }
-        return toReturn;
-    }
-
-    public Sample getOrCreateSample(String id) {
-        Sample sample = getSampleById(id);
-        if (sample == null) {
-            sample = new Sample(id);
-            addSample(sample);
-        }
-        return sample;
-    }
-
-    /**
-     * Returns all samples that were referenced in the SAM file
-     */
-    public Set<Sample> getSAMFileSamples() {
-        Set<Sample> toReturn = new HashSet<Sample>();
-        for (Sample sample : samples.values()) {
-            if (sample.hasSAMFileEntry())
-                toReturn.add(sample);
-        }
-        return toReturn;
-    }
-
-    /**
-     * Returns a set of sample objects for the sample names in a variant context
-     *
-     * @param context Any variant context
-     * @return a set of the sample objects
-     */
-    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
-        Set<Sample> samples = new HashSet<Sample>();
-        for (String sampleName : context.getSampleNames()) {
-            samples.add(getOrCreateSample(sampleName));
-        }
-        return samples;
-    }
-
-
-    /**
-     * Return a subcontext restricted to samples with a given property key/value
-     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
-     * @param context VariantContext to filter
-     * @param key property key
-     * @param value property value (must be string)
-     * @return subcontext
-     */
-    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
-
-        Set<String> samplesWithProperty = new HashSet<String>();
-        for (String sampleName : context.getSampleNames()) {
-            Sample s = samples.get(sampleName);
-            if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
-                samplesWithProperty.add(sampleName);
-        }
-        Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
-        return context.subContextFromGenotypes(genotypes.values());
-    }
-
-    public static SampleDataSource createEmptyDataSource() {
-        SAMFileHeader header = new SAMFileHeader();
-        return new SampleDataSource(header, null);
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
@ -1,65 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 12, 2010
- * Time: 1:30:44 PM
- */
-public class SampleFileParser {
-
-    private SampleAlias[] sampleAliases;
-
-    private String[] allowedProperties;
-
-    private String[] allowedRelationships;
-
-    private PropertyDefinition[] propertyDefinitions;
-
-    private SampleParser[] samples;
-
-    public PropertyDefinition[] getPropertyDefinitions() {
-        return propertyDefinitions;
-    }
-
-    public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
-        this.propertyDefinitions = propertyDefinitions;
-    }
-
-    public SampleFileParser() {
-
-    }
-
-    public String[] getAllowedProperties() {
-        return allowedProperties;
-    }
-
-    public void setAllowedProperties(String[] allowedProperties) {
-        this.allowedProperties = allowedProperties;
-    }
-
-    public SampleParser[] getSamples() {
-        return samples;
-    }
-
-    public void setSamples(SampleParser[] samples) {
-        this.samples = samples;
-    }
-
-    public String[] getAllowedRelationships() {
-        return allowedRelationships;
-    }
-
-    public void setAllowedRelationships(String[] allowedRelationships) {
-        this.allowedRelationships = allowedRelationships;
-    }
-
-    public SampleAlias[] getSampleAliases() {
-        return sampleAliases;
-    }
-
-    public void setSampleAliases(SampleAlias[] sampleAliases) {
-        this.sampleAliases = sampleAliases;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
@ -1,43 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import java.util.HashMap;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 13, 2010
- * Time: 2:09:43 PM
- */
-public class SampleParser {
-
-    private String id;
-
-    private HashMap<String, Object> properties;
-
-    private HashMap<String, String> relationships;
-
-    public String getId() {
-        return id;
-    }
-
-    public void setId(String id) {
-        this.id = id;
-    }
-
-    public HashMap<String, Object> getProperties() {
-        return properties;
-    }
-
-    public void setProperties(HashMap<String, Object> properties) {
-        this.properties = properties;
-    }
-
-    public HashMap<String, String> getRelationships() {
-        return relationships;
-    }
-
-    public void setRelationships(HashMap<String, String> relationships) {
-        this.relationships = relationships;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -85,12 +85,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     */
    protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
        super(engine, walker, reads, reference, rods);
-
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
-
-        if (engine.getArguments().processingTrackerFile != null) {
-            throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode.  Complain to Mark depristo@broadinstitute.org to implement and test this code path");
-        }
    }

    public Object execute( Walker walker, ShardStrategy shardStrategy ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.SampleUtils;

 import java.util.Collection;

@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
            traversalEngine.startTimersIfNecessary();
            if(shard.getShardType() == Shard.ShardType.LOCUS) {
                LocusWalker lWalker = (LocusWalker)walker;
-                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
+                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
+                        getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
                for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
                    ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
                    Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
@ -62,16 +62,17 @@ public class ShardTraverser implements Callable {

            Object accumulator = walker.reduceInit();
            LocusWalker lWalker = (LocusWalker)walker;
-            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
-            ShardDataProvider dataProvider = null;
+            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
+                    microScheduler.getReadIterator(shard),
+                    shard.getGenomeLocs(),
+                    microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?

            for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
-                dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
+                final ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),microScheduler.getEngine().getGenomeLocParser(),iterator.getLocus(),iterator,microScheduler.reference,microScheduler.rods);
                accumulator = traversalEngine.traverse( walker, dataProvider, accumulator );
                dataProvider.close();
            }

-            if (dataProvider != null) dataProvider.close();
            windowMaker.close();
            outputMergeTask = outputTracker.closeStorage();

--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
     * the given intervals.
     * @param iterator The data source for this window.
     * @param intervals The set of intervals over which to traverse.
-     * @param sampleData SampleDataSource that we can reference reads with
+     * @param sampleNames The complete set of sample names in the reads in shard
     */

-    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
+    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
        this.sourceInfo = shard.getReadProperties();
        this.readIterator = iterator;
-
-        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
+        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
        this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
    }

+    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
+        this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
+    }
+
    public Iterator<WindowMakerIterator> iterator() {
        return this;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/MalformedReadFilter.java
@ -27,7 +27,9 @@ package org.broadinstitute.sting.gatk.filters;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMSequenceRecord;
+import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.exceptions.UserException;

 /**
 * Filter out malformed reads.
@ -37,14 +39,25 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 */
 public class MalformedReadFilter extends ReadFilter {
    private SAMFileHeader header;
-    
+
+    @Argument(fullName = "filter_mismatching_base_and_quals", shortName = "filterMBQ", doc = "if a read has mismatching number of bases and base qualities, filter out the read instead of blowing up.", required = false)
+    boolean filterMismatchingBaseAndQuals = false;
+
    @Override
    public void initialize(GenomeAnalysisEngine engine) {
        this.header = engine.getSAMFileHeader();
    }

    public boolean filterOut(SAMRecord read) {
-        return !checkInvalidAlignmentStart(read) ||
+        // slowly changing the behavior to blow up first and filtering out if a parameter is explicitly provided
+        if (!checkMismatchingBasesAndQuals(read)) {
+            if (!filterMismatchingBaseAndQuals)
+                throw new UserException.MalformedBAM(read, "BAM file has a read with mismatching number of bases and base qualities. Offender: " + read.getReadName() +"  [" + read.getReadLength() + " bases] [" +read.getBaseQualities().length +"] quals");
+            else
+                return true;
+        }
+
+        return  !checkInvalidAlignmentStart(read) ||
                !checkInvalidAlignmentEnd(read) ||
                !checkAlignmentDisagreesWithHeader(this.header,read) ||
                !checkCigarDisagreesWithAlignment(read);
@ -108,4 +121,13 @@ public class MalformedReadFilter extends ReadFilter {
            return false;
        return true;
    }
+
+    /**
+     * Check if the read has the same number of bases and base qualities
+     * @param read the read to validate
+     * @return true if they have the same number. False otherwise.
+     */
+    private static boolean checkMismatchingBasesAndQuals(SAMRecord read) {
+        return (read.getReadLength() == read.getBaseQualities().length);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/ReadNameFilter.java
@ -0,0 +1,23 @@
+package org.broadinstitute.sting.gatk.filters;
+
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.commandline.Argument;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: chartl
+ * Date: 9/19/11
+ * Time: 4:09 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ReadNameFilter extends ReadFilter {
+     @Argument(fullName = "readName", shortName = "rn", doc="Filter out all reads except those with this read name", required=true)
+    private String readName;
+
+    public boolean filterOut(final SAMRecord rec) {
+        return ! rec.getReadName().equals(readName);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/storage/VCFWriterStorage.java
@ -46,7 +46,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        else if ( stub.getOutputStream() != null ) {
            this.file = null;
            this.stream = stub.getOutputStream();
-            writer = new StandardVCFWriter(stream, stub.doNotWriteGenotypes());
+            writer = new StandardVCFWriter(stream, stub.getMasterSequenceDictionary(), stub.doNotWriteGenotypes());
        }
        else
            throw new ReviewedStingException("Unable to create target to which to write; storage was provided with neither a file nor a stream.");
@ -71,7 +71,7 @@ public class VCFWriterStorage implements Storage<VCFWriterStorage>, VCFWriter {
        }

        // The GATK/Tribble can't currently index block-compressed files on the fly.  Disable OTF indexing even if the user explicitly asked for it.
-        return new StandardVCFWriter(file, this.stream, indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
+        return new StandardVCFWriter(file, this.stream, stub.getMasterSequenceDictionary(), indexOnTheFly && !stub.isCompressed(), stub.doNotWriteGenotypes());
    }


--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VCFWriterStub.java
@ -25,6 +25,7 @@

 package org.broadinstitute.sting.gatk.io.stubs;

+import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.gatk.CommandLineExecutable;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@ -150,6 +151,15 @@ public class VCFWriterStub implements Stub<VCFWriter>, VCFWriter {
        return isCompressed;
    }

+    /**
+     * Gets the master sequence dictionary from the engine associated with this stub
+     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
+     * @return
+     */
+    public SAMSequenceDictionary getMasterSequenceDictionary() {
+        return engine.getMasterSequenceDictionary();
+    }
+
    /**
     * Should we tell the VCF writer not to write genotypes?
     * @return true if the writer should not write genotypes.
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -35,26 +35,23 @@ import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.ReservoirDownsampler;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedExtendedEventPileupImpl;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

 import java.util.*;

 /** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
 public class LocusIteratorByState extends LocusIterator {
-//    private static long discarded_bases = 0L;
-//    private static long observed_bases = 0L;
-
    /** our log, which we want to capture anything from this class */
    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);

@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
     * Used to create new GenomeLocs.
     */
    private final GenomeLocParser genomeLocParser;
-    private final ArrayList<Sample> samples;
+    private final ArrayList<String> samples;
    private final ReadStateManager readStates;

    static private class SAMRecordState {
@ -278,15 +275,27 @@ public class LocusIteratorByState extends LocusIterator {
    //
    // -----------------------------------------------------------------------------------------------------------------

-    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
+    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
        this.readInfo = readInformation;
        this.genomeLocParser = genomeLocParser;
+        this.samples = new ArrayList<String>(samples);
+        this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());

-        // get the list of samples
-        this.samples = new ArrayList<Sample>(sampleData.getSamples());
-        
-        readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
-    
+        // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
+        // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
+        if ( this.samples.isEmpty() && samIterator.hasNext() ) {
+            throw new IllegalArgumentException("samples list must not be empty");
+        }
+    }
+
+    /**
+     * For testing only.  Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
+     * for the system.
+     */
+    public final static Collection<String> sampleListForSAMWithoutReadGroups() {
+        List<String> samples = new ArrayList<String>();
+        samples.add(null);
+        return samples;
    }

    public Iterator<AlignmentContext> iterator() {
@ -303,19 +312,6 @@ public class LocusIteratorByState extends LocusIterator {
        //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
    }

-    public void printState() {
-        for(Sample sample: samples) {
-            Iterator<SAMRecordState> iterator = readStates.iterator(sample);
-            while(iterator.hasNext()) {
-                SAMRecordState state = iterator.next();
-                logger.debug(String.format("printState():"));
-                SAMRecord read = state.getRead();
-                int offset = state.getReadOffset();
-                logger.debug(String.format("  read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
-            }
-        }
-    }
-
    private GenomeLoc getLocation() {
        return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
    }
@ -355,14 +351,14 @@ public class LocusIteratorByState extends LocusIterator {
            // In this case, the subsequent call to next() will emit the normal pileup at the current base
            // and shift the position.
            if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
-                Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
+                Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();

                // get current location on the reference and decrement it by 1: the indels we just stepped over
                // are associated with the *previous* reference base
                GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);

                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
                    hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
@ -382,10 +378,7 @@ public class LocusIteratorByState extends LocusIterator {
                                maxDeletionLength = Math.max(maxDeletionLength,state.getEventLength());
                            }
                            else nInsertions++;
-                            indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
-                                                                           state.getReadEventStartOffset(),
-                                                                           state.getEventLength(),
-                                                                           state.getEventBases()) );
+                            indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadEventStartOffset(), state.getEventLength(), state.getEventBases()) );

                        }   else {
                            // HACK: The readahead mechanism for LocusIteratorByState will effectively read past the current position
@ -407,9 +400,7 @@ public class LocusIteratorByState extends LocusIterator {
                                    // we count such reads (with a longer deletion spanning over a deletion at the previous base we are
                                    // about to report) only if includeReadsWithDeletionAtLoci is true.
                                    size++;
-                                    indelPile.add ( new ExtendedEventPileupElement(state.getRead(),
-                                                                           state.getReadOffset()-1,
-                                                                           -1) // length=-1 --> noevent
+                                    indelPile.add ( new ExtendedEventPileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()-1, -1) // length=-1 --> noevent
                                            );
                                }
                            }
@ -426,10 +417,10 @@ public class LocusIteratorByState extends LocusIterator {
                nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
            }  else {
                GenomeLoc location = getLocation();
-                Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
+                Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();

                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
                    hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
@ -447,12 +438,12 @@ public class LocusIteratorByState extends LocusIterator {
                                continue;
                            } else {
                                //observed_bases++;
-                                pile.add(new PileupElement(state.getRead(), state.getReadOffset()));
+                                pile.add(new PileupElement((GATKSAMRecord) state.getRead(), state.getReadOffset()));
                                size++;
                            }
                        } else if ( readInfo.includeReadsWithDeletionAtLoci() && state.getCurrentCigarOperator() != CigarOperator.N ) {
                            size++;
-                            pile.add(new PileupElement(state.getRead(), -1));
+                            pile.add(new PileupElement((GATKSAMRecord) state.getRead(), -1));
                            nDeletions++;
                        }

@ -495,7 +486,7 @@ public class LocusIteratorByState extends LocusIterator {
    }

    private void updateReadStates() {
-        for(Sample sample: samples) {
+        for(final String sample: samples) {
            Iterator<SAMRecordState> it = readStates.iterator(sample);
            while ( it.hasNext() ) {
                SAMRecordState state = it.next();
@ -522,7 +513,7 @@ public class LocusIteratorByState extends LocusIterator {
        private final PeekableIterator<SAMRecord> iterator;
        private final DownsamplingMethod downsamplingMethod;
        private final SamplePartitioner samplePartitioner;
-        private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
+        private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
        private final int targetCoverage;
        private int totalReadStates = 0;

@ -540,9 +531,9 @@ public class LocusIteratorByState extends LocusIterator {
            }

            Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                readStatesBySample.put(sample,new PerSampleReadStateManager());
-                readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
+                readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
            }

            samplePartitioner = new SamplePartitioner(readSelectors);
@ -554,7 +545,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Iterator over the reads associated with that sample.
         */
-        public Iterator<SAMRecordState> iterator(final Sample sample) {
+        public Iterator<SAMRecordState> iterator(final String sample) {
            return new Iterator<SAMRecordState>() {
                private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();

@ -590,7 +581,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Total number of reads in the given sample.
         */
-        public int size(final Sample sample) {
+        public int size(final String sample) {
            return readStatesBySample.get(sample).size();
        }

@ -600,12 +591,12 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample Sample, downsampled independently.
         * @return Integer stop of the furthest undownsampled region.
         */
-        public int getDownsamplingExtent(final Sample sample) {
+        public int getDownsamplingExtent(final String sample) {
            return readStatesBySample.get(sample).getDownsamplingExtent();
        }

        public SAMRecordState getFirst() {
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                PerSampleReadStateManager reads = readStatesBySample.get(sample);
                if(!reads.isEmpty())
                    return reads.peek();
@ -639,8 +630,8 @@ public class LocusIteratorByState extends LocusIterator {
            }
            samplePartitioner.complete();

-            for(Sample sample: samples) {
-                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
+            for(final String sample: samples) {
+                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);

                Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());

@ -1072,6 +1063,3 @@ class SamplePartitioner implements ReadSelector {
    }

 }
-
-
-
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadFormattingIterator.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.gatk.iterators;

 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 /**
 * An iterator which does post-processing of a read, including potentially wrapping
@ -78,7 +77,30 @@ public class ReadFormattingIterator implements StingSAMIterator {
     *         no next exists.
     */
    public SAMRecord next() {
-        return new GATKSAMRecord(wrappedIterator.next(), useOriginalBaseQualities, defaultBaseQualities);
+        SAMRecord rec = wrappedIterator.next();
+
+        // if we are using default quals, check if we need them, and add if necessary.
+        // 1. we need if reads are lacking or have incomplete quality scores
+        // 2. we add if defaultBaseQualities has a positive value
+        if (defaultBaseQualities >= 0) {
+            byte reads [] = rec.getReadBases();
+            byte quals [] = rec.getBaseQualities();
+            if (quals == null || quals.length < reads.length) {
+                byte new_quals [] = new byte [reads.length];
+                for (int i=0; i<reads.length; i++)
+                    new_quals[i] = defaultBaseQualities;
+                rec.setBaseQualities(new_quals);
+            }
+        }
+
+        // if we are using original quals, set them now if they are present in the record
+        if ( useOriginalBaseQualities ) {
+            byte[] originalQuals = rec.getOriginalBaseQualities();
+            if ( originalQuals != null )
+                rec.setBaseQualities(originalQuals);
+        }
+
+        return rec;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -28,7 +28,6 @@ import org.apache.log4j.Level;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -101,9 +100,6 @@ public class GATKRunReport {
    @Element(required = false, name = "exception")
    private final ExceptionToXML mException;

-    @Element(required = false, name = "argument_collection")
-    private final GATKArgumentCollection mCollection;
-
    @Element(required = true, name = "working_directory")
    private String currentPath;

@ -187,7 +183,6 @@ public class GATKRunReport {
            cmdLine = engine.createApproximateCommandLineArgumentString(engine, walker);
        } catch (Exception ignore) { }

-        this.mCollection = engine.getArguments();
        walkerName = engine.getWalkerName(walker.getClass());
        svnVersion = CommandLineGATK.getVersionNumber();

@ -293,15 +288,16 @@ public class GATKRunReport {
     * That is, postReport() is guarenteed not to fail for any reason.
     */
    private File postReportToLocalDisk(File rootDir) {
+        String filename = getID() + ".report.xml.gz";
+        File file = new File(rootDir, filename);
        try {
-            String filename = getID() + ".report.xml.gz";
-            File file = new File(rootDir, filename);
            postReportToFile(file);
            logger.debug("Wrote report to " + file);
            return file;
        } catch ( Exception e ) {
            // we catch everything, and no matter what eat the error
            exceptDuringRunReport("Couldn't read report file", e);
+            file.delete();
            return null;
        }
    }
@ -312,6 +308,7 @@ public class GATKRunReport {
        File localFile = postReportToLocalDisk(new File("./"));
        logger.debug("Generating GATK report to AWS S3 based on local file " + localFile);
        if ( localFile != null ) { // we succeeded in creating the local file
+            localFile.deleteOnExit();
            try {
                // stop us from printing the annoying, and meaningless, mime types warning
                Logger mimeTypeLogger = Logger.getLogger(org.jets3t.service.utils.Mimetypes.class);
@ -336,14 +333,13 @@ public class GATKRunReport {
                //logger.info("Uploading " + localFile + " to AWS bucket");
                S3Object s3Object = s3Service.putObject(REPORT_BUCKET_NAME, fileObject);
                logger.debug("Uploaded to AWS: " + s3Object);
+                logger.info("Uploaded run statistics report to AWS S3");
            } catch ( S3ServiceException e ) {
                exceptDuringRunReport("S3 exception occurred", e);
            } catch ( NoSuchAlgorithmException e ) {
                exceptDuringRunReport("Couldn't calculate MD5", e);
            } catch ( IOException e ) {
                exceptDuringRunReport("Couldn't read report file", e);
-            } finally {
-                localFile.delete();
            }
        }
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/indexer/RMDIndexer.java
@ -1,130 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata.indexer;
-
-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import org.apache.log4j.Logger;
-import org.broad.tribble.FeatureCodec;
-import org.broad.tribble.Tribble;
-import org.broad.tribble.index.Index;
-import org.broad.tribble.index.IndexFactory;
-import org.broad.tribble.util.LittleEndianOutputStream;
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.CommandLineProgram;
-import org.broadinstitute.sting.commandline.Input;
-import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
-import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec;
-import org.broadinstitute.sting.gatk.refdata.tracks.FeatureManager;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-
-import java.io.File;
-import java.io.FileOutputStream;
-
-/**
- * a utility class that can create an index, written to a target location.  This is useful when you're unable to write to the directory
- * in which an index is located, or if you'd like to pre-index files to save time.
- */
-public class RMDIndexer extends CommandLineProgram {
-    @Argument(shortName="in", fullName="inputFile", doc="The reference meta data file to index", required = true)
-    File inputFileSource = null;
-
-    @Argument(shortName="t", fullName="type", doc="The reference meta data file format (e.g. vcf, bed)", required = true)
-    String inputFileType = null;
-
-    @Input(fullName = "referenceSequence", shortName = "R", doc = "The reference to use when indexing; this sequence will be set in the index", required = true)
-    public File referenceFile = null;
-
-    @Input(shortName = "i", fullName = "indexFile", doc = "Where to write the index to (as a file), if not supplied we write to <inputFile>.idx", required = false)
-    public File indexFile = null;
-
-    @Argument(shortName = "ba", fullName = "balanceApproach", doc="the index balancing approach to take", required=false)
-    IndexFactory.IndexBalanceApproach approach = IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME;
-
-    private static Logger logger = Logger.getLogger(RMDIndexer.class);
-    private IndexedFastaSequenceFile ref = null;
-    private GenomeLocParser genomeLocParser = null;
-
-    @Override
-    protected int execute() throws Exception {
-
-
-        // check parameters
-        // ---------------------------------------------------------------------------------
-
-        // check the input parameters
-        if (referenceFile != null && !referenceFile.canRead())
-            throw new IllegalArgumentException("We can't read the reference file: "
-                + referenceFile + ", check that it exists, and that you have permissions to read it");
-
-
-        // set the index file to the default name if they didn't specify a file
-        if (indexFile == null && inputFileSource != null)
-            indexFile = new File(inputFileSource.getAbsolutePath() + Tribble.STANDARD_INDEX_EXTENSION);
-
-        // check that we can create the output file
-        if (indexFile == null || indexFile.exists())
-            throw new IllegalArgumentException("We can't write to the index file location: "
-                + indexFile + ", the index exists");
-
-        logger.info(String.format("attempting to index file:   %s", inputFileSource));
-        logger.info(String.format("using reference:            %s", ((referenceFile != null) ? referenceFile.getAbsolutePath() : "(not supplied)")));
-        logger.info(String.format("using type:                 %s", inputFileType));
-        logger.info(String.format("writing to location:        %s", indexFile.getAbsolutePath()));
-
-        // try to index the file
-        // ---------------------------------------------------------------------------------
-
-        // setup the reference
-        ref = new CachingIndexedFastaSequenceFile(referenceFile);
-        genomeLocParser = new GenomeLocParser(ref);
-
-        // get a track builder
-        RMDTrackBuilder builder = new RMDTrackBuilder(ref.getSequenceDictionary(),genomeLocParser, ValidationExclusion.TYPE.ALL);
-
-        // find the types available to the track builders
-        FeatureManager.FeatureDescriptor descriptor = builder.getFeatureManager().getByName(inputFileType);
-
-        // check that the type is valid
-        if (descriptor == null)
-            throw new IllegalArgumentException("The type specified " + inputFileType + " is not a valid type.  Valid type list: " + builder.getFeatureManager().userFriendlyListOfAvailableFeatures());
-
-        // create the codec
-        FeatureCodec codec = builder.getFeatureManager().createCodec(descriptor, "foo", genomeLocParser);
-
-        // check if it's a reference dependent feature codec
-        if (codec instanceof ReferenceDependentFeatureCodec)
-            ((ReferenceDependentFeatureCodec)codec).setGenomeLocParser(genomeLocParser);
-        
-        // get some timing info
-        long currentTime = System.currentTimeMillis();
-
-        Index index = IndexFactory.createIndex(inputFileSource, codec, approach);
-
-        // add writing of the sequence dictionary, if supplied
-        builder.setIndexSequenceDictionary(inputFileSource, index, ref.getSequenceDictionary(), indexFile, false);
-
-        // create the output stream, and write the index
-        LittleEndianOutputStream stream = new LittleEndianOutputStream(new FileOutputStream(indexFile));
-        index.write(stream);
-        stream.close();
-
-        // report and exit
-        logger.info("Successfully wrote the index to location: " + indexFile + " in " + ((System.currentTimeMillis() - currentTime)/1000) + " seconds");
-        return 0;  // return successfully
-    }
-
-
-    /**
-     * the generic call execute main
-     * @param argv the arguments from the command line
-     */
-    public static void main(String[] argv) {
-        try {
-            RMDIndexer instance = new RMDIndexer();
-            start(instance, argv);
-            System.exit(CommandLineProgram.result);
-        } catch (Exception e) {
-            exitSystemWithError(e);
-        }
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/IndexDictionaryUtils.java
@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.refdata.tracks;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import org.apache.log4j.Logger;
+import org.broad.tribble.index.Index;
+import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
+
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
+
+/**
+ * Utilities for working with Sequence Dictionaries embedded in tribble indices
+ *
+ * @author Your Name
+ * @since Date created
+ */
+public class IndexDictionaryUtils {
+    private final static Logger logger = Logger.getLogger(IndexDictionaryUtils.class);
+
+    // a constant we use for marking sequence dictionary entries in the Tribble index property list
+    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
+
+    /**
+     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
+     * @param index the index file to use
+     * @return a SAMSequenceDictionary if available, null if unavailable
+     */
+    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
+        SAMSequenceDictionary dict = new SAMSequenceDictionary();
+        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
+            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
+                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
+                        Integer.valueOf(entry.getValue())));
+        }
+        return dict;
+    }
+
+    /**
+     * create the sequence dictionary with the contig list; a backup approach
+     * @param index the index file to use
+     * @param dict the sequence dictionary to add contigs to
+     * @return the filled-in sequence dictionary
+     */
+    static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
+        LinkedHashSet<String> seqNames = index.getSequenceNames();
+        if (seqNames == null) {
+            return dict;
+        }
+        for (String name : seqNames) {
+            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
+            dict.addSequence(seq);
+        }
+        return dict;
+    }
+
+    public static void setIndexSequenceDictionary(Index index, SAMSequenceDictionary dict) {
+        for ( SAMSequenceRecord seq : dict.getSequences() ) {
+            final String contig = IndexDictionaryUtils.SequenceDictionaryPropertyPredicate + seq.getSequenceName();
+            final String length = String.valueOf(seq.getSequenceLength());
+            index.addProperty(contig,length);
+        }
+    }
+
+    public static void validateTrackSequenceDictionary(final String trackName,
+                                                       final SAMSequenceDictionary trackDict,
+                                                       final SAMSequenceDictionary referenceDict,
+                                                       final ValidationExclusion.TYPE validationExclusionType ) {
+        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
+        if (trackDict == null || trackDict.size() == 0)
+            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
+        else {
+            Set<String> trackSequences = new TreeSet<String>();
+            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
+                trackSequences.add(dictionaryEntry.getSequenceName());
+            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/tracks/RMDTrackBuilder.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.gatk.refdata.tracks;

 import net.sf.samtools.SAMSequenceDictionary;
-import net.sf.samtools.SAMSequenceRecord;
 import org.apache.log4j.Logger;
 import org.broad.tribble.FeatureCodec;
 import org.broad.tribble.FeatureSource;
@ -41,7 +40,6 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet.RMDStorageType;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -52,16 +50,11 @@ import org.broadinstitute.sting.utils.instrumentation.Sizeof;
 import java.io.File;
 import java.io.FileOutputStream;
 import java.io.IOException;
-import java.util.LinkedHashSet;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-


 /**
- * 
- * @author aaron 
+ *
+ * @author aaron
 *                                           `
 * Class RMDTrackBuilder
 *
@ -76,9 +69,6 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
    private final static Logger logger = Logger.getLogger(RMDTrackBuilder.class);
    public final static boolean MEASURE_TRIBBLE_QUERY_PERFORMANCE = false;

-    // a constant we use for marking sequence dictionary entries in the Tribble index property list
-    public static final String SequenceDictionaryPropertyPredicate = "DICT:";
-
    // private sequence dictionary we use to set our tracks with
    private SAMSequenceDictionary dict = null;

@ -150,7 +140,7 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        final FeatureManager.FeatureDescriptor descriptor = getFeatureManager().getByCodec(codecClass);

        if (descriptor == null)
-            throw new ReviewedStingException("Unable to find type name for codex class " + codecClass.getName());
+            throw new ReviewedStingException("Unable to find type name for codec class " + codecClass.getName());

        return createInstanceOfTrack(new RMDTriplet("anonymous",descriptor.getName(),inputFile.getAbsolutePath(),RMDStorageType.FILE,new Tags()));
    }
@ -210,13 +200,19 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
                try { logger.info(String.format("  Index for %s has size in bytes %d", inputFile, Sizeof.getObjectGraphSize(index))); }
                catch (ReviewedStingException e) { }

-                sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);

                // if we don't have a dictionary in the Tribble file, and we've set a dictionary for this builder, set it in the file if they match
                if (sequenceDictionary.size() == 0 && dict != null) {
                    File indexFile = Tribble.indexFile(inputFile);
-                    setIndexSequenceDictionary(inputFile,index,dict,indexFile,true);
-                    sequenceDictionary = getSequenceDictionaryFromProperties(index);
+                    validateAndUpdateIndexSequenceDictionary(inputFile, index, dict);
+                    try { // re-write the index
+                        writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
+                    } catch (IOException e) {
+                        logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
+                    }
+
+                    sequenceDictionary = IndexDictionaryUtils.getSequenceDictionaryFromProperties(index);
                }

                if ( MEASURE_TRIBBLE_QUERY_PERFORMANCE )
@ -363,88 +359,31 @@ public class RMDTrackBuilder { // extends PluginManager<FeatureCodec> {
        // this can take a while, let them know what we're doing
        logger.info("Creating Tribble index in memory for file " + inputFile);
        Index idx = IndexFactory.createIndex(inputFile, codec, IndexFactory.IndexBalanceApproach.FOR_SEEK_TIME);
-        setIndexSequenceDictionary(inputFile, idx, dict, null, false);
+        validateAndUpdateIndexSequenceDictionary(inputFile, idx, dict);
        return idx;
    }

-
-    // ---------------------------------------------------------------------------------------------------------
-    // static functions to work with the sequence dictionaries of indexes
-    // ---------------------------------------------------------------------------------------------------------
-    
-    /**
-     * get the sequence dictionary from the track, if available.  If not, make it from the contig list that is always in the index
-     * @param index the index file to use
-     * @return a SAMSequenceDictionary if available, null if unavailable
-     */
-    public static SAMSequenceDictionary getSequenceDictionaryFromProperties(Index index) {
-        SAMSequenceDictionary dict = new SAMSequenceDictionary();
-        for (Map.Entry<String,String> entry : index.getProperties().entrySet()) {
-            if (entry.getKey().startsWith(SequenceDictionaryPropertyPredicate))
-                dict.addSequence(new SAMSequenceRecord(entry.getKey().substring(SequenceDictionaryPropertyPredicate.length() , entry.getKey().length()),
-                                 Integer.valueOf(entry.getValue())));
-        }
-        return dict;
-    }
-
-    /**
-     * create the sequence dictionary with the contig list; a backup approach
-     * @param index the index file to use
-     * @param dict the sequence dictionary to add contigs to
-     * @return the filled-in sequence dictionary
-     */
-    private static SAMSequenceDictionary createSequenceDictionaryFromContigList(Index index, SAMSequenceDictionary dict) {
-        LinkedHashSet<String> seqNames = index.getSequenceNames();
-        if (seqNames == null) {
-            return dict;
-        }
-        for (String name : seqNames) {
-            SAMSequenceRecord seq = new SAMSequenceRecord(name, 0);
-            dict.addSequence(seq);
-        }
-        return dict;
-    }
-
    /**
     * set the sequence dictionary of the track.  This function checks that the contig listing of the underlying file is compatible.
     * (that each contig in the index is in the sequence dictionary).
     * @param inputFile for proper error message formatting.
     * @param dict the sequence dictionary
     * @param index the index file
-     * @param indexFile the index file
-     * @param rewriteIndex should we rewrite the index when we're done?
-     *
     */
-    public void setIndexSequenceDictionary(File inputFile, Index index, SAMSequenceDictionary dict, File indexFile, boolean rewriteIndex) {
-        if (dict == null) return;
-
-        SAMSequenceDictionary currentDict = createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
-        validateTrackSequenceDictionary(inputFile.getAbsolutePath(),currentDict,dict);
+    public void validateAndUpdateIndexSequenceDictionary(final File inputFile, final Index index, final SAMSequenceDictionary dict) {
+        if (dict == null) throw new ReviewedStingException("BUG: dict cannot be null");

        // check that every contig in the RMD contig list is at least in the sequence dictionary we're being asked to set
-        for (SAMSequenceRecord seq : currentDict.getSequences()) {
-            if (dict.getSequence(seq.getSequenceName()) == null)
-                continue;
-            index.addProperty(SequenceDictionaryPropertyPredicate + dict.getSequence(seq.getSequenceName()).getSequenceName(), String.valueOf(dict.getSequence(seq.getSequenceName()).getSequenceLength()));
-        }
-        // re-write the index
-        if (rewriteIndex) try {
-            writeIndexToDisk(index,indexFile,new FSLockWithShared(indexFile));
-        } catch (IOException e) {
-            logger.warn("Unable to update index with the sequence dictionary for file " + indexFile + "; this will not effect your run of the GATK");
-        }
+        final SAMSequenceDictionary currentDict = IndexDictionaryUtils.createSequenceDictionaryFromContigList(index, new SAMSequenceDictionary());
+        validateTrackSequenceDictionary(inputFile.getAbsolutePath(), currentDict, dict);
+
+        // actually update the dictionary in the index
+        IndexDictionaryUtils.setIndexSequenceDictionary(index, dict);
    }

-
-    public void validateTrackSequenceDictionary(String trackName, SAMSequenceDictionary trackDict, SAMSequenceDictionary referenceDict) {
-        // if the sequence dictionary is empty (as well as null which means it doesn't have a dictionary), skip validation
-        if (trackDict == null || trackDict.size() == 0)
-            logger.info("Track " + trackName + " doesn't have a sequence dictionary built in, skipping dictionary validation");
-        else {
-            Set<String> trackSequences = new TreeSet<String>();
-            for (SAMSequenceRecord dictionaryEntry : trackDict.getSequences())
-                trackSequences.add(dictionaryEntry.getSequenceName());
-            SequenceDictionaryUtils.validateDictionaries(logger, validationExclusionType, trackName, trackDict, "reference", referenceDict);
-        }
+    public void validateTrackSequenceDictionary(final String trackName,
+                                                final SAMSequenceDictionary trackDict,
+                                                final SAMSequenceDictionary referenceDict ) {
+        IndexDictionaryUtils.validateTrackSequenceDictionary(trackName, trackDict, referenceDict, validationExclusionType);
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/refdata/utils/RMDIntervalGenerator.java
@ -1,57 +0,0 @@
-package org.broadinstitute.sting.gatk.refdata.utils;
-
-import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.utils.GenomeLoc;
-
-import java.util.*;
-
-
-/**
- * 
- * @author aaron 
- * 
- * Class RMDIntervalGenerator
- *
- * Creates an interval list, given an RMDTrack
- */
-public class RMDIntervalGenerator {
-    public ReferenceOrderedDataSource dataSource;
-
-    /**
-     * create a interval representation of a ROD track
-     * @param dataSource the track
-     */
-    public RMDIntervalGenerator(ReferenceOrderedDataSource dataSource) {
-        if (dataSource == null) throw new IllegalArgumentException("Data source cannot be null");
-        this.dataSource = dataSource;
-    }
-
-    /**
-     * create a genome location list from the interval track
-     * @return a list of genome locations
-     */
-    public List<GenomeLoc> toGenomeLocList() {
-        Iterator<RODRecordList> iter = dataSource.seek((GenomeLoc)null);
-        List<GenomeLoc> locations = new ArrayList<GenomeLoc>();
-        while (iter.hasNext()) {
-            RODRecordList feature = iter.next();
-            GenomeLoc loc = feature.getLocation();
-            if (loc != null) locations.add(loc);            
-        }
-        return locations;
-    }
-
-    /**
-     * return a map of reference meta data track names to RODS
-     * @param sources the reference ordered data sources to get the names from
-     * @return a map of reference meta data names to RODS
-     */
-    public static Map<String,ReferenceOrderedDataSource> getRMDTrackNames(List<ReferenceOrderedDataSource> sources) {
-        // get a list of the current rod names we're working with
-        Map<String,ReferenceOrderedDataSource> rodNames = new HashMap<String,ReferenceOrderedDataSource>();
-        for (ReferenceOrderedDataSource rod : sources) {
-            rodNames.put(rod.getName(),rod);
-        }
-        return rodNames;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportColumns.java
@ -24,12 +24,14 @@

 package org.broadinstitute.sting.gatk.report;

+import org.broadinstitute.sting.utils.collections.Pair;
+
 import java.util.*;

 /**
 * Tracks a linked list of GATKReportColumn in order by name.
 */
-public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
+public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> implements Iterable<GATKReportColumn> {
    private List<String> columnNames = new ArrayList<String>();

    /**
@ -52,4 +54,14 @@ public class GATKReportColumns extends LinkedHashMap<String, GATKReportColumn> {
        columnNames.add(key);
        return super.put(key, value);
    }
+
+    @Override
+    public Iterator<GATKReportColumn> iterator() {
+        return new Iterator<GATKReportColumn>() {
+            int offset = 0;
+            public boolean hasNext() { return offset < columnNames.size() ; }
+            public GATKReportColumn next() { return getByIndex(offset++); }
+            public void remove() { throw new UnsupportedOperationException("Cannot remove from a GATKReportColumn iterator"); }
+        };
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportTable.java
@ -286,6 +286,10 @@ public class GATKReportTable {
        }
    }

+    public boolean containsKey(Object primaryKey) {
+        return primaryKeyColumn.contains(primaryKey);
+    }
+
    /**
     * Set the value for a given position in the table
     *
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+ * Categorical sample trait for association and analysis
+ *
+ * Samples can have unknown status, be affected or unaffected by the
+ * categorical trait, or they can be marked as actually having an
+ * other trait value (stored in an associated value in the Sample class)
+ *
+ * @author Mark DePristo
+ * @since Sept. 2011
+ */
+public enum Affection {
+    /** Status is unknown */
+    UNKNOWN,
+    /** Suffers from the disease */
+    AFFECTED,
+    /** Unaffected by the disease */
+    UNAFFECTED,
+    /** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
+    OTHER
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+* ENUM of possible human genders: male, female, or unknown
+*/
+public enum Gender {
+    MALE,
+    FEMALE,
+    UNKNOWN
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.text.XReadLines;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Reads PED file-formatted tabular text files
+ *
+ * See http://www.broadinstitute.org/mpg/tagger/faq.html
+ * See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
+ *
+ * The "ped" file format refers to the widely-used format for linkage pedigree data.
+ * Each line describes a single (diploid) individual in the following format:
+ *
+ *      family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
+ *
+ * If your data lacks pedigree information (for example, unrelated case/control individuals),
+ * set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
+ * phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
+ * Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
+ * where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
+ * Check out the information at the PLINK website on the "ped" file format.
+ *
+ * The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
+ *  Family ID
+ *  Individual ID
+ *  Paternal ID
+ *  Maternal ID
+ *  Sex (1=male; 2=female; other=unknown)
+ *  Phenotype
+ *
+ *  The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
+ *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
+ *  quantitative trait or an affection status column: PLINK will automatically detect which type
+ *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
+ *  Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
+ *  and are actually representing these values as strings instead of doubles
+ *
+ *  NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
+ *  not a comma, i.e. 2.394 not 2,394
+ *
+ *  If an individual's sex is unknown, then any character other than 1 or 2 can be used.
+ *  When new files are created (PED, FAM, or other which contain sex) then the original coding will be
+ *  preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
+ *  and an error message will arise if an analysis that uses family information is requested and an
+ *  individual of 'unknown' sex is specified as a father or mother.
+ *
+ *
+ *  HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
+ *  line will be ignored. Do not start any family IDs with this character therefore.
+ *
+ *  Affection status, by default, should be coded:
+ *  -9 missing
+ *   0 missing
+ *   1 unaffected
+ *   2 affected
+ *
+ * If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
+ * plink --file mydata --1 which will specify a disease phenotype coded:
+ *
+ *  -9 missing
+ *  0 unaffected
+ *  1 affected
+ *
+ * The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
+ * disease traits as well as 0). It can be reset by including the --missing-phenotype option:
+ *
+ * Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
+ * (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
+ * character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
+ * alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
+ *
+ * No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
+ *
+ *   FAM001  1  0 0  1  2  A A  G G  A C
+ *   FAM001  2  0 0  1  2  A A  A G  0 0
+ *   ...
+ *
+ * Note that the GATK does not support genotypes in a PED file.
+ *
+ * @author Mark DePristo
+ * @since 2011
+ */
+public class PedReader {
+    private static Logger logger = Logger.getLogger(PedReader.class);
+    final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
+    final static private String commentMarker = "#";
+
+    /**
+     * An enum that specifies which, if any, of the standard PED fields are
+     * missing from the input records.  For example, suppose we have the full record:
+     *
+     * "fam1 kid dad mom 1 2"
+     *
+     * indicating a male affected child.  This can be parsed with the -ped x.ped argument
+     * to the GATK.  Suppose we only have:
+     *
+     * "fam1 kid 1"
+     *
+     * we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
+     */
+    public enum MissingPedField {
+        /**
+         * The PED records do not have the first (FAMILY_ID) argument.  The family id
+         * will be set to null / empty.
+         */
+        NO_FAMILY_ID,
+
+        /**
+         * The PED records do not have either the paternal or maternal IDs, so
+         * the corresponding IDs are set to null.
+         */
+        NO_PARENTS,
+
+        /**
+         * The PED records do not have the GENDER field, so the sex of each
+         * sample will be set to UNKNOWN.
+         */
+        NO_SEX,
+
+        /**
+         * The PED records do not have the PHENOTYPE field, so the phenotype
+         * of each sample will be set to UNKNOWN.
+         */
+        NO_PHENOTYPE
+    }
+
+    protected enum Field {
+        FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
+    }
+
+    // phenotype
+    private final static String MISSING_VALUE1 = "-9";
+    private final static String MISSING_VALUE2 = "0";
+    private final static String PHENOTYPE_UNAFFECTED = "1";
+    private final static String PHENOTYPE_AFFECTED = "2";
+
+    // Sex
+    private final static String SEX_MALE = "1";
+    private final static String SEX_FEMALE = "2";
+    // other=unknown
+
+    public PedReader() { }
+
+    public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException  {
+        logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
+        return parse(new FileReader(source), missingFields, sampleDB);
+    }
+
+    public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
+        logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
+        return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
+    }
+
+    public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
+        final List<String> lines = new XReadLines(reader).readLines();
+
+        // What are the record offsets?
+        final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
+        final int samplePos = familyPos + 1;
+        final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
+        final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
+        final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
+        final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
+        final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
+
+        // go through once and determine properties
+        int lineNo = 1;
+        boolean isQT = false;
+        final List<String[]> splits = new ArrayList<String[]>(lines.size());
+        for ( final String line : lines ) {
+            if ( line.startsWith(commentMarker)) continue;
+            if ( line.trim().equals("") ) continue;
+
+            final String[] parts = line.split("\\s+");
+
+            if ( parts.length != nExpectedFields )
+                throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
+
+            if ( phenotypePos != -1 ) {
+                isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
+            }
+
+            splits.add(parts);
+            lineNo++;
+        }
+        logger.info("Phenotype is other? " + isQT);
+
+        // now go through and parse each record
+        lineNo = 1;
+        final List<Sample> samples = new ArrayList<Sample>(splits.size());
+        for ( final String[] parts : splits ) {
+            String familyID = null, individualID, paternalID = null, maternalID = null;
+            Gender sex = Gender.UNKNOWN;
+            String quantitativePhenotype = Sample.UNSET_QT;
+            Affection affection = Affection.UNKNOWN;
+
+            if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
+            individualID = parts[samplePos];
+            if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
+            if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
+
+            if ( sexPos != -1 ) {
+                if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
+                else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
+                else sex = Gender.UNKNOWN;
+            }
+
+            if ( phenotypePos != -1 ) {
+                if ( isQT ) {
+                    if ( parts[phenotypePos].equals(MISSING_VALUE1) )
+                        affection = Affection.UNKNOWN;
+                    else {
+                        affection = Affection.OTHER;
+                        quantitativePhenotype = parts[phenotypePos];
+                    }
+                } else {
+                    if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
+                    else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
+                    else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
+                    else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
+                    else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
+                }
+            }
+
+            final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
+            samples.add(s);
+            sampleDB.addSample(s);
+            lineNo++;
+        }
+
+        for ( final Sample sample : new ArrayList<Sample>(samples) ) {
+            Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
+            if ( dad != null ) samples.add(dad);
+
+            Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
+            if ( mom != null ) samples.add(mom);
+        }
+
+        return samples;
+    }
+
+    private final static String maybeMissing(final String string) {
+        if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
+            return null;
+        else
+            return string;
+    }
+
+    private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
+        if ( id != null && sampleDB.getSample(id) == null ) {
+            Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
+            sampleDB.addSample(s);
+            return s;
+        } else
+            return null;
+    }
+
+    /**
+     * Parses a list of tags from the command line, assuming it comes from the GATK Engine
+     * tags, and returns the corresponding EnumSet.
+     *
+     * @param arg the actual engine arg, used for the UserException if there's an error
+     * @param tags a list of string tags that should be converted to the MissingPedField value
+     * @return
+     */
+    public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
+        final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
+
+        for ( final String tag : tags ) {
+            try {
+                missingFields.add(MissingPedField.valueOf(tag));
+            } catch ( IllegalArgumentException e ) {
+                throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
+            }
+        }
+
+        return missingFields;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+*
+*/
+public enum PedigreeValidationType {
+    /**
+     * Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
+     * entry in the pedigree file(s).
+     */
+    STRICT,
+
+    /**
+     * Do not enforce any overlap between the VCF/BAM samples and the pedigree data
+     * */
+    SILENT
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
@ -0,0 +1,222 @@
+package org.broadinstitute.sting.gatk.samples;
+
+
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ */
+public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
+    final private String familyID, paternalID, maternalID;
+    final private Gender gender;
+    final private String otherPhenotype;
+    final private Affection affection;
+    final private String ID;
+    final private SampleDB infoDB;
+    final private Map<String, Object> properties = new HashMap<String, Object>();
+
+    public final static String UNSET_QT = null;
+
+    public Sample(final String ID, final SampleDB infoDB,
+                  final String familyID, final String paternalID, final String maternalID,
+                  final Gender gender, final Affection affection, final String otherPhenotype) {
+        this.familyID = familyID;
+        this.paternalID = paternalID;
+        this.maternalID = maternalID;
+        this.gender = gender;
+        this.otherPhenotype = otherPhenotype;
+        this.affection = affection;
+        this.ID = ID;
+        this.infoDB = infoDB;
+    }
+
+    protected Sample(final String ID,
+                     final String familyID, final String paternalID, final String maternalID,
+                     final Gender gender, final Affection affection, final String otherPhenotype) {
+        this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
+    }
+
+    protected Sample(final String ID,
+                     final String familyID, final String paternalID, final String maternalID,
+                     final Gender gender, final Affection affection) {
+        this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
+    }
+
+
+    public Sample(final String ID, final SampleDB infoDB,
+                  final String familyID, final String paternalID, final String maternalID, final Gender gender) {
+        this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
+    }
+
+    public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
+        this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
+    }
+
+    public Sample(String id, SampleDB infoDB) {
+        this(id, infoDB, null, null, null,
+                Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // standard property getters
+    //
+    // -------------------------------------------------------------------------------------
+
+    public String getID() {
+        return ID;
+    }
+
+    public String getFamilyID() {
+        return familyID;
+    }
+
+    public String getPaternalID() {
+        return paternalID;
+    }
+
+    public String getMaternalID() {
+        return maternalID;
+    }
+
+    public Affection getAffection() {
+        return affection;
+    }
+
+    public boolean hasOtherPhenotype() {
+        return affection == Affection.OTHER;
+    }
+
+    public String getOtherPhenotype() {
+        return otherPhenotype;
+    }
+
+    /**
+     * Get the sample's mother
+     * @return sample object with relationship mother, if exists, or null
+     */
+    public Sample getMother() {
+        return infoDB.getSample(maternalID);
+    }
+
+    /**
+     * Get the sample's father
+     * @return sample object with relationship father, if exists, or null
+     */
+    public Sample getFather() {
+        return infoDB.getSample(paternalID);
+    }
+
+    /**
+     * Get gender of the sample
+     * @return property of key "gender" - must be of type Gender
+     */
+    public Gender getGender() {
+        return gender;
+    }
+
+    @Override
+    public int compareTo(final Sample sample) {
+        return ID.compareTo(sample.getID());
+    }
+
+    @Override
+    public String toString() {
+        return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
+                getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
+                getOtherPhenotype(), properties);
+    }
+
+//    // -------------------------------------------------------------------------------------
+//    //
+//    // code for working with additional -- none standard -- properites
+//    //
+//    // -------------------------------------------------------------------------------------
+//
+//    public Map<String, Object> getExtraProperties() {
+//        return Collections.unmodifiableMap(properties);
+//    }
+//
+//    /**
+//     * Get one property
+//     * @param key key of property
+//     * @return value of property as generic object
+//     */
+//    public Object getExtraPropertyValue(final String key) {
+//        return properties.get(key);
+//    }
+//
+//    /**
+//     *
+//     * @param key property key
+//     * @return true if sample has this property (even if its value is null)
+//     */
+//    public boolean hasExtraProperty(String key) {
+//        return properties.containsKey(key);
+//    }
+
+    @Override
+    public int hashCode() {
+        return ID.hashCode();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if(o == null)
+            return false;
+        if(o instanceof Sample) {
+            Sample otherSample = (Sample)o;
+            return ID.equals(otherSample.ID) &&
+                    equalOrNull(familyID, otherSample.familyID) &&
+                    equalOrNull(paternalID, otherSample.paternalID) &&
+                    equalOrNull(maternalID, otherSample.maternalID) &&
+                    equalOrNull(gender, otherSample.gender) &&
+                    equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
+                    equalOrNull(affection, otherSample.affection) &&
+                    equalOrNull(properties, otherSample.properties);
+        }
+        return false;
+    }
+
+    private final static boolean equalOrNull(final Object o1, final Object o2) {
+        if ( o1 == null )
+            return o2 == null;
+        else
+            return o2 == null ? false : o1.equals(o2);
+    }
+
+    private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
+        if ( o1 == null || o1.equals(emptyValue) ) {
+            // take o2 if both are null, otherwise keep o2
+            return o2 == null ? null : o2;
+        } else {
+            if ( o2 == null || o2.equals(emptyValue) )
+                return o1; // keep o1, since it's a real value
+            else {
+                // both o1 and o2 have a value
+                if ( o1 == o2 )
+                    return o1;
+                else
+                    throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
+            }
+        }
+    }
+
+    public final static Sample mergeSamples(final Sample prev, final Sample next) {
+        if ( prev.equals(next) )
+            return next;
+        else {
+            return new Sample(prev.getID(), prev.infoDB,
+                    mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
+                    mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
+                    mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
+                    mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
+                    mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
+                    mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
+                    //mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
@ -0,0 +1,183 @@
+package org.broadinstitute.sting.gatk.samples;
+
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+
+import java.util.*;
+
+/**
+ *
+ */
+public class SampleDB {
+    /**
+     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
+     * this is stored as a HashMap.
+     */
+    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
+
+    /**
+     * Constructor takes both a SAM header and sample files because the two must be integrated.
+     */
+    public SampleDB() {
+
+    }
+
+    /**
+     * Protected function to add a single sample to the database
+     *
+     * @param sample to be added
+     */
+    protected SampleDB addSample(Sample sample) {
+        Sample prev = samples.get(sample.getID());
+        if ( prev != null )
+            sample = Sample.mergeSamples(prev, sample);
+        samples.put(sample.getID(), sample);
+        return this;
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Functions for getting a sample from the DB
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Get a sample by its ID
+     * If an alias is passed in, return the main sample object 
+     * @param id
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(String id) {
+        return samples.get(id);
+    }
+
+    /**
+     *
+     * @param read
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final SAMRecord read) {
+        return getSample(read.getReadGroup());
+    }
+
+    /**
+     *
+     * @param rg
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final SAMReadGroupRecord rg) {
+        return getSample(rg.getSample());
+    }
+
+    /**
+     * @param g Genotype
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final Genotype g) {
+        return getSample(g.getSampleName());
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Functions for accessing samples in the DB
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Get number of sample objects
+     * @return size of samples map
+     */
+    public int sampleCount() {
+        return samples.size();
+    }
+
+    public Set<Sample> getSamples() {
+        return new HashSet<Sample>(samples.values());
+    }
+
+    public Collection<String> getSampleNames() {
+        return Collections.unmodifiableCollection(samples.keySet());
+    }
+
+
+    /**
+     * Takes a collection of sample names and returns their corresponding sample objects
+     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
+     * @param sampleNameList Set of sample names
+     * @return Corresponding set of samples
+     */
+    public Set<Sample> getSamples(Collection<String> sampleNameList) {
+        HashSet<Sample> samples = new HashSet<Sample>();
+        for (String name : sampleNameList) {
+            try {
+                samples.add(getSample(name));
+            }
+            catch (Exception e) {
+                throw new StingException("Could not get sample with the following ID: " + name, e);
+            }
+        }
+        return samples;
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Higher level pedigree functions
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Returns a sorted set of the family IDs in all samples (excluding null ids)
+     * @return
+     */
+    public final Set<String> getFamilyIDs() {
+        return getFamilies().keySet();
+    }
+
+    /**
+     * Returns a map from family ID -> set of family members for all samples with
+     * non-null family ids
+     *
+     * @return
+     */
+    public final Map<String, Set<Sample>> getFamilies() {
+        final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
+
+        for ( final Sample sample : samples.values() ) {
+            final String famID = sample.getFamilyID();
+            if ( famID != null ) {
+                if ( ! families.containsKey(famID) )
+                    families.put(famID, new TreeSet<Sample>());
+                families.get(famID).add(sample);
+            }
+        }
+
+        return families;
+    }
+
+    /**
+     * Return all samples with a given family ID
+     * @param familyId
+     * @return
+     */
+    public Set<Sample> getFamily(String familyId) {
+        return getFamilies().get(familyId);
+    }
+
+    /**
+     * Returns all children of a given sample
+     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
+     * @param sample
+     * @return
+     */
+    public Set<Sample> getChildren(Sample sample) {
+        final HashSet<Sample> children = new HashSet<Sample>();
+        for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
+            if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
+                children.add(familyMember);
+            }
+        }
+        return children;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+/**
+ *
+ */
+public class SampleDBBuilder {
+    PedigreeValidationType validationStrictness;
+    final SampleDB sampleDB = new SampleDB();
+    final GenomeAnalysisEngine engine;
+
+    Set<Sample> samplesFromDataSources = new HashSet<Sample>();
+    Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
+
+    /** for testing only */
+    protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
+        engine = null;
+        this.validationStrictness = validationStrictness;
+    }
+
+    /**
+     * Constructor takes both a SAM header and sample files because the two must be integrated.
+     */
+    public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
+        this.engine = engine;
+        this.validationStrictness = validationStrictness;
+    }
+
+    /**
+     * Hallucinates sample objects for all the samples in the SAM file and stores them
+     */
+    public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
+        addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
+        for (final String sampleName : sampleNames) {
+            if (sampleDB.getSample(sampleName) == null) {
+                final Sample newSample = new Sample(sampleName, sampleDB);
+                sampleDB.addSample(newSample);
+                samplesFromDataSources.add(newSample); // keep track of data source samples
+            }
+        }
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
+        for (final File pedFile : pedigreeFiles) {
+            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
+            samplesFromPedigrees.addAll(samples);
+        }
+
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
+        for (final String pedString : pedigreeStrings) {
+            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
+            samplesFromPedigrees.addAll(samples);
+        }
+
+        return this;
+    }
+
+    /**
+     * Parse one sample file and integrate it with samples that are already there
+     * Fail quickly if we find any errors in the file
+     */
+    private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
+        final PedReader reader = new PedReader();
+
+        try {
+            return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
+        } catch ( FileNotFoundException e ) {
+            throw new UserException.CouldNotReadInputFile(sampleFile, e);
+        }
+    }
+
+    private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
+        final PedReader reader = new PedReader();
+        return reader.parse(string, getMissingFields(string), sampleDB);
+    }
+
+    public SampleDB getFinalSampleDB() {
+        validate();
+        return sampleDB;
+    }
+
+    public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
+        if ( engine == null )
+            return EnumSet.noneOf(PedReader.MissingPedField.class);
+        else {
+            final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
+            return PedReader.parseMissingFieldTags(engineArg, posTags);
+        }
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Validation
+    //
+    // --------------------------------------------------------------------------------
+
+    protected final void validate() {
+        if ( validationStrictness == PedigreeValidationType.SILENT )
+            return;
+        else {
+            // check that samples in data sources are all annotated, if anything is annotated
+            if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
+                final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
+                for ( final Sample pSample : samplesFromPedigrees )
+                    sampleNamesFromPedigrees.add(pSample.getID());
+
+                for ( final Sample dsSample : samplesFromDataSources )
+                    if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
+                        throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
+            }
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -364,8 +364,8 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide

        // count up the number of skipped reads by summing over all filters
        long nSkippedReads = 0L;
-        for ( Map.Entry<Class, Long> countsByFilter: cumulativeMetrics.getCountsByFilter().entrySet())
-            nSkippedReads += countsByFilter.getValue();
+        for ( final long countsByFilter : cumulativeMetrics.getCountsByFilter().values())
+            nSkippedReads += countsByFilter;

        logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours", elapsed, elapsed / 60, elapsed / 3600));
        if ( cumulativeMetrics.getNumReadsSeen() > 0 )
@ -373,10 +373,10 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
                    nSkippedReads,
                    cumulativeMetrics.getNumReadsSeen(),
                    100.0 * MathUtils.ratio(nSkippedReads,cumulativeMetrics.getNumReadsSeen())));
-        for ( Map.Entry<Class, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
+        for ( Map.Entry<String, Long> filterCounts : cumulativeMetrics.getCountsByFilter().entrySet() ) {
            long count = filterCounts.getValue();
            logger.info(String.format("  -> %d reads (%.2f%% of total) failing %s",
-                    count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), Utils.getClassName(filterCounts.getKey())));
+                    count, 100.0 * MathUtils.ratio(count,cumulativeMetrics.getNumReadsSeen()), filterCounts.getKey()));
        }

        if ( performanceLog != null ) performanceLog.close();
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.iterators.PushbackIterator;
 import org.broadinstitute.sting.gatk.walkers.DuplicateWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.*;

@ -57,9 +58,9 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
        return "dups";
    }

-    private List<SAMRecord> readsAtLoc(final SAMRecord read, PushbackIterator<SAMRecord> iter) {
+    private List<GATKSAMRecord> readsAtLoc(final GATKSAMRecord read, PushbackIterator<SAMRecord> iter) {
        GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);
-        ArrayList<SAMRecord> l = new ArrayList<SAMRecord>();
+        ArrayList<GATKSAMRecord> l = new ArrayList<GATKSAMRecord>();

        l.add(read);
        for (SAMRecord read2 : iter) {
@ -70,7 +71,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                iter.pushback(read2);
                break;
            } else {
-                l.add(read2);
+                l.add((GATKSAMRecord) read2);
            }
        }

@ -84,15 +85,15 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
     * @param reads the list of reads to split into unique molecular samples
     * @return
     */
-    protected Set<List<SAMRecord>> uniqueReadSets(List<SAMRecord> reads) {
-        Set<List<SAMRecord>> readSets = new LinkedHashSet<List<SAMRecord>>();
+    protected Set<List<GATKSAMRecord>> uniqueReadSets(List<GATKSAMRecord> reads) {
+        Set<List<GATKSAMRecord>> readSets = new LinkedHashSet<List<GATKSAMRecord>>();

        // for each read, find duplicates, and either add the read to its duplicate list or start a new one
-        for ( SAMRecord read : reads ) {
-            List<SAMRecord> readSet = findDuplicateReads(read, readSets);
+        for ( GATKSAMRecord read : reads ) {
+            List<GATKSAMRecord> readSet = findDuplicateReads(read, readSets);

            if ( readSet == null ) {
-                readSets.add(new ArrayList<SAMRecord>(Arrays.asList(read)));    // copy so I can add to the list
+                readSets.add(new ArrayList<GATKSAMRecord>(Arrays.asList(read)));    // copy so I can add to the list
            } else {
                readSet.add(read);
            }
@ -110,13 +111,13 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
     * @param readSets
     * @return The list of duplicate reads that read is a member of, or null if it's the only one of its kind
     */
-    protected List<SAMRecord> findDuplicateReads(SAMRecord read, Set<List<SAMRecord>> readSets ) {
+    protected List<GATKSAMRecord> findDuplicateReads(GATKSAMRecord read, Set<List<GATKSAMRecord>> readSets ) {
        if ( read.getReadPairedFlag() ) {
            // paired
            final GenomeLoc readMateLoc = engine.getGenomeLocParser().createGenomeLoc(read.getMateReferenceName(), read.getMateAlignmentStart(), read.getMateAlignmentStart());

-            for (List<SAMRecord> reads : readSets) {
-                SAMRecord key = reads.get(0);
+            for (List<GATKSAMRecord> reads : readSets) {
+                GATKSAMRecord key = reads.get(0);

                // read and key start at the same place, and either the this read and the key
                // share a mate location or the read is flagged as a duplicate
@ -131,8 +132,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                }
            }
        } else {
-            for (List<SAMRecord> reads : readSets) {
-                SAMRecord key = reads.get(0);
+            for (List<GATKSAMRecord> reads : readSets) {
+                GATKSAMRecord key = reads.get(0);
                boolean v = (! key.getReadPairedFlag()) && read.getAlignmentStart() == key.getAlignmentStart() && ( key.getDuplicateReadFlag() || read.getDuplicateReadFlag() ) && read.getReadLength() == key.getReadLength();
                //System.out.printf("%s %s %b %b %d %d %d %d => %b%n",
                //        read.getReadPairedFlag(), key.getReadPairedFlag(), read.getDuplicateReadFlag(), key.getDuplicateReadFlag(),
@ -179,7 +180,7 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
            // get the genome loc from the read
            GenomeLoc site = engine.getGenomeLocParser().createGenomeLoc(read);

-            Set<List<SAMRecord>> readSets = uniqueReadSets(readsAtLoc(read, iter));
+            Set<List<GATKSAMRecord>> readSets = uniqueReadSets(readsAtLoc((GATKSAMRecord) read, iter));
            if ( DEBUG ) logger.debug(String.format("*** TraverseDuplicates.traverse at %s with %d read sets", site, readSets.size()));

            // Jump forward in the reference to this locus location
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 /*
 * Copyright (c) 2009 The Broad Institute
@ -100,9 +101,9 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
            // if the read is mapped, create a metadata tracker
            ReadMetaDataTracker tracker = (read.getReferenceIndex() >= 0) ? rodView.getReferenceOrderedDataForRead(read) : null;

-            final boolean keepMeP = walker.filter(refContext, read);
+            final boolean keepMeP = walker.filter(refContext, (GATKSAMRecord) read);
            if (keepMeP) {
-                M x = walker.map(refContext, read, tracker); // the tracker can be null
+                M x = walker.map(refContext, (GATKSAMRecord) read, tracker); // the tracker can be null
                sum = walker.reduce(x, sum);
            }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ClipReadsWalker.java
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers;
 import net.sf.picard.reference.ReferenceSequence;
 import net.sf.picard.reference.ReferenceSequenceFile;
 import net.sf.picard.reference.ReferenceSequenceFileFactory;
-import net.sf.samtools.SAMRecord;
 import net.sf.samtools.util.StringUtil;
 import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
@ -43,6 +42,7 @@ import org.broadinstitute.sting.utils.clipreads.ClippingOp;
 import org.broadinstitute.sting.utils.clipreads.ClippingRepresentation;
 import org.broadinstitute.sting.utils.clipreads.ReadClipper;
 import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

 import java.io.File;
@ -292,11 +292,12 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
    /**
     * The reads map function.
     *
+     *
     * @param ref  the reference bases that correspond to our read, if a reference was provided
-     * @param read the read itself, as a SAMRecord
+     * @param read the read itself, as a GATKSAMRecord
     * @return the ReadClipper object describing what should be done to clip this read
     */
-    public ReadClipperWithData map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public ReadClipperWithData map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        if ( onlyDoRead == null || read.getReadName().equals(onlyDoRead) ) {
            if ( clippingRepresentation == ClippingRepresentation.HARDCLIP_BASES ) {
                read = ReadUtils.replaceSoftClipsWithMatches(read);
@ -323,7 +324,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     */
    private void clipSequences(ReadClipperWithData clipper) {
        if (sequencesToClip != null) {                // don't bother if we don't have any sequences to clip
-            SAMRecord read = clipper.getRead();
+            GATKSAMRecord read = clipper.getRead();
            ClippingData data = clipper.getData();

            for (SeqToClip stc : sequencesToClip) {
@ -360,7 +361,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     * @param stop
     * @return
     */
-    private Pair<Integer, Integer> strandAwarePositions(SAMRecord read, int start, int stop) {
+    private Pair<Integer, Integer> strandAwarePositions(GATKSAMRecord read, int start, int stop) {
        if (read.getReadNegativeStrandFlag())
            return new Pair<Integer, Integer>(read.getReadLength() - stop - 1, read.getReadLength() - start - 1);
        else
@ -374,7 +375,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     */
    private void clipCycles(ReadClipperWithData clipper) {
        if (cyclesToClip != null) {
-            SAMRecord read = clipper.getRead();
+            GATKSAMRecord read = clipper.getRead();
            ClippingData data = clipper.getData();

            for (Pair<Integer, Integer> p : cyclesToClip) {   // iterate over each cycle range
@ -416,7 +417,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
     * @param clipper
     */
    private void clipBadQualityScores(ReadClipperWithData clipper) {
-        SAMRecord read = clipper.getRead();
+        GATKSAMRecord read = clipper.getRead();
        ClippingData data = clipper.getData();
        int readLen = read.getReadBases().length;
        byte[] quals = read.getBaseQualities();
@ -458,7 +459,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
        if ( clipper == null )
            return data;

-        SAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
+        GATKSAMRecord clippedRead = clipper.clipRead(clippingRepresentation);
        if (outputBam != null) {
            outputBam.addAlignment(clippedRead);
        } else {
@ -575,7 +576,7 @@ public class ClipReadsWalker extends ReadWalker<ClipReadsWalker.ReadClipperWithD
    public class ReadClipperWithData extends ReadClipper {
        private ClippingData data;

-        public ReadClipperWithData(SAMRecord read, List<SeqToClip> clipSeqs) {
+        public ReadClipperWithData(GATKSAMRecord read, List<SeqToClip> clipSeqs) {
            super(read);
            data = new ClippingData(clipSeqs);
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/DuplicateWalker.java
@ -1,10 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.util.List;
 import java.util.Set;
@ -20,11 +20,11 @@ import java.util.Set;
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class})
 public abstract class DuplicateWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
-    public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets ) {
+    public boolean filter(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets ) {
        return true;    // We are keeping all the reads
    }

-    public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<SAMRecord>> readSets );
+    public abstract MapType map(GenomeLoc loc, AlignmentContext context, Set<List<GATKSAMRecord>> readSets );

    // Given result of map function
    public abstract ReduceType reduceInit();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FindReadsWithNamesWalker.java
@ -33,6 +33,7 @@ import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.text.XReadLines;

 import java.io.File;
@ -71,21 +72,23 @@ public class FindReadsWithNamesWalker extends ReadWalker<SAMRecord, SAMFileWrite

    /**
     * The reads filter function.
+     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return true if the read passes the filter, false if it doesn't
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        return namesToKeep.contains(read.getReadName());
 	}

    /**
     * The reads map function.
+     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return the read itself
     */
-    public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        return read;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStatWalker.java
@ -1,9 +1,9 @@
 package org.broadinstitute.sting.gatk.walkers;

-import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 import java.io.PrintStream;
 import java.text.DecimalFormat;
@ -119,7 +119,7 @@ public class FlagStatWalker extends ReadWalker<Integer, Integer> {

    private FlagStat myStat = new FlagStat();

-    public Integer map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public Integer map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        myStat.readCount++;
        if (read.getReadFailsVendorQualityCheckFlag()) {
            myStat.QC_failure++;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/LocusWalker.java
@ -17,7 +17,7 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 */
@By(DataSource.READS)
@Requires({DataSource.READS,DataSource.REFERENCE, DataSource.REFERENCE_BASES})
-@PartitionBy(PartitionType.INTERVAL)
+@PartitionBy(PartitionType.LOCUS)
@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
 public abstract class LocusWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    // Do we actually want to operate on the context?
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PartitionType.java
@ -34,6 +34,12 @@ public enum PartitionType {
     */
    NONE,

+    /**
+     * The walker inputs can be chunked down to individual
+     * reads.
+     */
+    READ,
+
    /**
     * The walker inputs can be chunked down to the
     * per-locus level.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@ -40,6 +40,7 @@ import java.util.TreeSet;

 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 /**
 * Renders, in SAM/BAM format, all reads from the input data set in the order in which they appear in the input file.
@ -136,11 +137,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {

    /**
     * The reads filter function.
+     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return true if the read passes the filter, false if it doesn't
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        // check the read group
        if  ( readGroup != null ) {
            SAMReadGroupRecord myReadGroup = read.getReadGroup();
@ -180,11 +182,12 @@ public class PrintReadsWalker extends ReadWalker<SAMRecord, SAMFileWriter> {

    /**
     * The reads map function.
+     *
     * @param ref the reference bases that correspond to our read, if a reference was provided
     * @param read the read itself, as a SAMRecord
     * @return the read itself
     */
-    public SAMRecord map( ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker ) {
+    public SAMRecord map( ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker ) {
        return read;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ReadWalker.java
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.walkers;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

 /**
 * Created by IntelliJ IDEA.
@ -12,7 +13,7 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 * To change this template use File | Settings | File Templates.
 */
@Requires({DataSource.READS, DataSource.REFERENCE_BASES})
-@PartitionBy(PartitionType.CONTIG)
+@PartitionBy(PartitionType.READ)
 public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, ReduceType> {
    public boolean requiresOrderedReads() { return false; }
    
@ -20,11 +21,11 @@ public abstract class ReadWalker<MapType, ReduceType> extends Walker<MapType, Re
    /** Must return true for reads that need to be processed. Reads, for which this method return false will
     * be skipped by the engine and never passed to the walker.
     */
-    public boolean filter(ReferenceContext ref, SAMRecord read) {
+    public boolean filter(ReferenceContext ref, GATKSAMRecord read) {
        // We are keeping all the reads
        return true;
    }

    // Map over the org.broadinstitute.sting.gatk.contexts.AlignmentContext
-    public abstract MapType map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker);
+    public abstract MapType map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker);
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/SplitSamFileWalker.java
@ -33,6 +33,7 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

 import java.util.ArrayList;
@ -60,7 +61,7 @@ public class SplitSamFileWalker extends ReadWalker<SAMRecord, Map<String, SAMFil
        logger.info("SplitSamFile version: " + VERSION);
    }

-    public SAMRecord map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker metaDataTracker) {
+    public SAMRecord map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
        return read;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -25,15 +25,17 @@

 package org.broadinstitute.sting.gatk.walkers;

+import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
+import org.broadinstitute.sting.gatk.samples.Sample;
+import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;

 import java.util.List;

@ -77,6 +79,23 @@ public abstract class Walker<MapType, ReduceType> {
        return toolkit;
    }

+    /**
+     * Gets the master sequence dictionary for this walker
+     * @link GenomeAnalysisEngine.getMasterSequenceDictionary
+     * @return
+     */
+    protected SAMSequenceDictionary getMasterSequenceDictionary() {
+        return getToolkit().getMasterSequenceDictionary();
+    }
+
+    protected SampleDB getSampleDB() {
+        return getToolkit().getSampleDB();
+    }
+
+    protected Sample getSample(final String id) {
+        return getToolkit().getSampleDB().getSample(id);
+    }
+
    /**
     * (conceptual static) method that states whether you want to see reads piling up at a locus
     * that contain a deletion at the locus.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalance.java
@ -92,7 +92,7 @@ public class AlleleBalance extends InfoFieldAnnotation {
                    continue;
                }
                // todo -- actually care about indel length from the pileup (agnostic at the moment)
-                int refCount = indelPileup.size();
+                int refCount = indelPileup.getNumberOfElements();
                int altCount = vc.isSimpleInsertion() ? indelPileup.getNumberOfInsertions() : indelPileup.getNumberOfDeletions();

                if ( refCount + altCount == 0 ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -47,7 +47,7 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
        if (!g.isHet())
            return null;

-        Set<Allele> altAlleles = vc.getAlternateAlleles();
+        Collection<Allele> altAlleles = vc.getAlternateAlleles();
        if ( altAlleles.size() == 0 )
            return null;

--- a/Show More
+++ b/Show More