Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

2011-10-06 13:41:45 -04:00 · 2011-10-06 13:41:45 -04:00 · 1b0735f0a3
parent c4dfc1fb8b 3961733590
commit 1b0735f0a3
67 changed files with 2047 additions and 2149 deletions
--- a/build.xml
+++ b/build.xml
@ -146,12 +146,14 @@
      <mkdir dir="${lib.dir}"/>
      <mkdir dir="${ivy.jar.dir}"/>

+      <!-- Comment out the following two lines to build the GATK without a network connection, assuming you have all of the libraries cached already -->
      <get src="http://repo1.maven.org/maven2/org/apache/ivy/ivy/${ivy.install.version}/${ivy.jar.file}"
           dest="${ivy.jar.dir}/${ivy.jar.file}"
           usetimestamp="true"/>
      <taskdef resource="org/apache/ivy/ant/antlib.xml"
               uri="antlib:org.apache.ivy.ant"
               classpath="${ivy.jar.dir}/${ivy.jar.file}"/>
+
      <ivy:settings file="${ivy.settings.dir}/ivysettings.xml"/>
        <property name="init.resolve.done" value="true"/>
    </target>
@ -546,8 +548,9 @@
    <target name="vcf.jar" depends="gatk.compile,init.jar">
        <jar jarfile="${dist.dir}/vcf.jar">
            <fileset dir="${java.classes}">
-              <include name="**/utils/codecs/**/*.class"/>
-              <include name="**/utils/variantcontext/**/*.class"/>
+              <include name="org/broadinstitute/sting/utils/codecs/vcf/**/*.class"/>
+              <include name="org/broadinstitute/sting/utils/variantcontext/**/*.class"/>
+              <include name="org/broadinstitute/sting/gatk/refdata/SelfScopingFeatureCodec.class"/>
              <include name="org/broadinstitute/sting/utils/exceptions/**"/>
 	      <include name="org/broadinstitute/sting/utils/help/DocumentedGATKFeature.class"/>
            </fileset>
@ -914,7 +917,7 @@
    </target>

    <!-- Our four different test conditions: Test, IntegrationTest, PerformanceTest, PipelineTest -->
-    <target name="test" depends="init.buildall,test.compile" description="Run unit tests">
+    <target name="test" depends="init.buildall,test.compile,vcf.jar" description="Run unit tests">
        <condition property="ttype" value="*UnitTest" else="${single}">
            <not><isset property="single"/></not>
        </condition>
--- a/public/R/src/gsalib/R/gsa.read.gatkreport.R
+++ b/public/R/src/gsalib/R/gsa.read.gatkreport.R
@ -99,5 +99,5 @@ gsa.read.gatkreport <- function(filename) {
        .gsa.assignGATKTableToEnvironment(tableName, tableHeader, tableRows, tableEnv);
    }

-    gatkreport = as.list(tableEnv);
+    gatkreport = as.list(tableEnv, all.names=TRUE);
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -34,28 +34,23 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.*;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
+import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.executive.MicroScheduler;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.ReadGroupBlackListFilter;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.stubs.Stub;
-import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDIntervalGenerator;
 import org.broadinstitute.sting.gatk.refdata.utils.RMDTriplet;
+import org.broadinstitute.sting.gatk.samples.SampleDBBuilder;
 import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.SequenceDictionaryUtils;
+import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.io.File;
 import java.util.*;
@ -92,7 +87,7 @@ public class GenomeAnalysisEngine {
    /**
     * Accessor for sample metadata
     */
-    private SampleDataSource sampleDataSource = null;
+    private SampleDB sampleDB = null;

    /**
     * Accessor for sharded reference-ordered data.
@ -206,6 +201,9 @@ public class GenomeAnalysisEngine {
        // Prepare the data for traversal.
        initializeDataSources();

+        // initialize sampleDB
+        initializeSampleDB();
+
        // initialize and validate the interval list
        initializeIntervals();
        validateSuppliedIntervals();
@ -692,12 +690,22 @@ public class GenomeAnalysisEngine {
        for (ReadFilter filter : filters)
            filter.initialize(this);

-        sampleDataSource = new SampleDataSource(getSAMFileHeader(), argCollection.sampleFiles);
-
        // set the sequence dictionary of all of Tribble tracks to the sequence dictionary of our reference
        rodDataSources = getReferenceOrderedDataSources(referenceMetaDataFiles,referenceDataSource.getReference().getSequenceDictionary(),genomeLocParser,argCollection.unsafe);
    }

+    /**
+     * Entry-point function to initialize the samples database from input data and pedigree arguments
+     */
+    private void initializeSampleDB() {
+        SampleDBBuilder sampleDBBuilder = new SampleDBBuilder(this, argCollection.pedigreeValidationType);
+        sampleDBBuilder.addSamplesFromSAMHeader(getSAMFileHeader());
+        sampleDBBuilder.addSamplesFromSampleNames(SampleUtils.getUniqueSamplesFromRods(this));
+        sampleDBBuilder.addSamplesFromPedigreeFiles(argCollection.pedigreeFiles);
+        sampleDBBuilder.addSamplesFromPedigreeStrings(argCollection.pedigreeStrings);
+        sampleDB = sampleDBBuilder.getFinalSampleDB();
+    }
+
    /**
     * Gets a unique identifier for the reader sourcing this read.
     * @param read Read to examine.
@ -716,100 +724,6 @@ public class GenomeAnalysisEngine {
        return getReadsDataSource().getSAMFile(id);
    }

-    /**
-     * Returns sets of samples present in the (merged) input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of sample names
-     * found in the corresponding bam file.
-     *
-     * @return Sets of samples in the merged input SAM stream, grouped by readers
-     */
-    public List<Set<String>> getSamplesByReaders() {
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> sample_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> samples = new HashSet<String>(1);
-            sample_sets.add(samples);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                samples.add(g.getSample());
-            }
-        }
-
-        return sample_sets;
-
-    }
-
-    /**
-     * Returns sets of libraries present in the (merged) input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of library names
-     * found in the corresponding bam file.
-     *
-     * @return Sets of libraries present in the (merged) input SAM stream, grouped by readers
-     */
-    public List<Set<String>> getLibrariesByReaders() {
-
-
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> lib_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> libs = new HashSet<String>(2);
-            lib_sets.add(libs);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                libs.add(g.getLibrary());
-            }
-        }
-
-        return lib_sets;
-
-    }
-
-    /**
-     * **** UNLESS YOU HAVE GOOD REASON TO, DO NOT USE THIS METHOD; USE getFileToReadGroupIdMapping() INSTEAD ****
-     *
-     * Returns sets of (remapped) read groups in input SAM stream, grouped by readers (i.e. underlying
-     * individual bam files). For instance: if GATK is run with three input bam files (three -I arguments), then the list
-     * returned by this method will contain 3 elements (one for each reader), with each element being a set of remapped read groups
-     * (i.e. as seen by read.getReadGroup().getReadGroupId() in the merged stream) that come from the corresponding bam file.
-     *
-     * @return sets of (merged) read group ids in order of input bams
-     */
-    public List<Set<String>> getMergedReadGroupsByReaders() {
-
-
-        Collection<SAMReaderID> readers = getReadsDataSource().getReaderIDs();
-
-        List<Set<String>> rg_sets = new ArrayList<Set<String>>(readers.size());
-
-        for (SAMReaderID r : readers) {
-
-            Set<String> groups = new HashSet<String>(5);
-            rg_sets.add(groups);
-
-            for (SAMReadGroupRecord g : getReadsDataSource().getHeader(r).getReadGroups()) {
-                if (getReadsDataSource().hasReadGroupCollisions()) { // Check if there were read group clashes with hasGroupIdDuplicates and if so:
-                    // use HeaderMerger to translate original read group id from the reader into the read group id in the
-                    // merged stream, and save that remapped read group id to associate it with specific reader
-                    groups.add(getReadsDataSource().getReadGroupId(r, g.getReadGroupId()));
-                } else {
-                    // otherwise, pass through the unmapped read groups since this is what Picard does as well
-                    groups.add(g.getReadGroupId());
-                }
-            }
-        }
-
-        return rg_sets;
-
-    }
-
    /**
     * Now that all files are open, validate the sequence dictionaries of the reads vs. the reference vrs the reference ordered data (if available).
     *
@ -929,6 +843,18 @@ public class GenomeAnalysisEngine {
        return readsDataSource.getHeader(reader);
    }

+    /**
+     * Returns an ordered list of the unmerged SAM file headers known to this engine.
+     * @return list of header for each input SAM file, in command line order
+     */
+    public List<SAMFileHeader> getSAMFileHeaders() {
+        final List<SAMFileHeader> headers = new ArrayList<SAMFileHeader>();
+        for ( final SAMReaderID id : getReadsDataSource().getReaderIDs() ) {
+            headers.add(getReadsDataSource().getHeader(id));
+        }
+        return headers;
+    }
+
    /**
     * Gets the master sequence dictionary for this GATK engine instance
     * @return a never-null dictionary listing all of the contigs known to this engine instance
@ -947,8 +873,6 @@ public class GenomeAnalysisEngine {
        return this.readsDataSource;
    }

-
-
    /**
     * Sets the collection of GATK main application arguments.
     *
@ -1035,140 +959,14 @@ public class GenomeAnalysisEngine {
        return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
    }

-    public SampleDataSource getSampleMetadata() {
-        return this.sampleDataSource;
-    }
+    // -------------------------------------------------------------------------------------
+    //
+    // code for working with Samples database
+    //
+    // -------------------------------------------------------------------------------------

-    /**
-     * Get a sample by its ID
-     * If an alias is passed in, return the main sample object
-     * @param id sample id
-     * @return sample Object with this ID
-     */
-    public Sample getSampleById(String id) {
-        return sampleDataSource.getSampleById(id);
-    }
-
-    /**
-     * Get the sample for a given read group
-     * Must first look up ID for read group
-     * @param readGroup of sample
-     * @return sample object with ID from the read group
-     */
-    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
-        return sampleDataSource.getSampleByReadGroup(readGroup);
-    }
-
-    /**
-     * Get a sample for a given read
-     * Must first look up read group, and then sample ID for that read group
-     * @param read of sample
-     * @return sample object of this read
-     */
-    public Sample getSampleByRead(SAMRecord read) {
-        return getSampleByReadGroup(read.getReadGroup());
-    }
-
-    /**
-     * Get number of sample objects
-     * @return size of samples map
-     */
-    public int sampleCount() {
-        return sampleDataSource.sampleCount();
-    }
-
-    /**
-     * Return all samples with a given family ID
-     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
-     * @param familyId family ID
-     * @return Samples with the given family ID
-     */
-    public Set<Sample> getFamily(String familyId) {
-        return sampleDataSource.getFamily(familyId);
-    }
-
-    /**
-     * Returns all children of a given sample
-     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
-     * @param sample parent sample
-     * @return children of the given sample
-     */
-    public Set<Sample> getChildren(Sample sample) {
-        return sampleDataSource.getChildren(sample);
-    }
-
-    /**
-     * Gets all the samples
-     * @return
-     */
-    public Collection<Sample> getSamples() {
-        return sampleDataSource.getSamples();
-    }
-
-    /**
-     * Takes a list of sample names and returns their corresponding sample objects
-     *
-     * @param sampleNameList List of sample names
-     * @return Corresponding set of samples
-     */
-    public Set<Sample> getSamples(Collection<String> sampleNameList) {
-	return sampleDataSource.getSamples(sampleNameList);
-    }
-
-
-    /**
-     * Returns a set of samples that have any value (which could be null) for a given property
-     * @param key Property key
-     * @return Set of samples with the property
-     */
-    public Set<Sample> getSamplesWithProperty(String key) {
-        return sampleDataSource.getSamplesWithProperty(key);
-    }
-
-    /**
-     * Returns a set of samples that have a property with a certain value
-     * Value must be a string for now - could add a similar method for matching any objects in the future
-     *
-     * @param key Property key
-     * @param value String property value
-     * @return Set of samples that match key and value
-     */
-    public Set<Sample> getSamplesWithProperty(String key, String value) {
-        return sampleDataSource.getSamplesWithProperty(key, value);
-
-    }
-
-    /**
-     * Returns a set of sample objects for the sample names in a variant context
-     *
-     * @param context Any variant context
-     * @return a set of the sample objects
-     */
-    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
-        Set<Sample> samples = new HashSet<Sample>();
-        for (String sampleName : context.getSampleNames()) {
-            samples.add(sampleDataSource.getOrCreateSample(sampleName));
-        }
-        return samples;
-    }
-
-    /**
-     * Returns all samples that were referenced in the SAM file
-     */
-    public Set<Sample> getSAMFileSamples() {
-        return sampleDataSource.getSAMFileSamples();
-    }
-
-    /**
-     * Return a subcontext restricted to samples with a given property key/value
-     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
-     * @param context VariantContext to filter
-     * @param key property key
-     * @param value property value (must be string)
-     * @return subcontext
-     */
-    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
-        return sampleDataSource.subContextFromSampleProperty(context, key, value);
+    public SampleDB getSampleDB() {
+        return this.sampleDB;
    }

    public Map<String,String> getApproximateCommandLineArguments(Object... argumentProviders) {
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -32,6 +32,7 @@ import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
+import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
@ -44,10 +45,7 @@ import org.simpleframework.xml.stream.HyphenStyle;
 import java.io.File;
 import java.io.InputStream;
 import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import java.util.*;

 /**
 * @author aaron
@ -72,11 +70,6 @@ public class GATKArgumentCollection {
    @Input(fullName = "input_file", shortName = "I", doc = "SAM or BAM file(s)", required = false)
    public List<String> samFiles = new ArrayList<String>();

-    // parameters and their defaults
-    @ElementList(required = false)
-    @Argument(fullName = "sample_metadata", shortName = "SM", doc = "Sample file(s) in JSON format", required = false)
-    public List<File> sampleFiles = new ArrayList<File>();
-
    @Element(required = false)
    @Argument(fullName = "read_buffer_size", shortName = "rbs", doc="Number of reads per SAM file to buffer in memory", required = false)
    public Integer readBufferSize = null;
@ -215,28 +208,91 @@ public class GATKArgumentCollection {

    // --------------------------------------------------------------------------------------------------------------
    //
-    // distributed GATK arguments
+    // PED (pedigree) support
    //
    // --------------------------------------------------------------------------------------------------------------
-    @Element(required=false)
-    @Argument(fullName="processingTracker",shortName="C",doc="A lockable, shared file for coordinating distributed GATK runs",required=false)
-    @Hidden
-    public File processingTrackerFile = null;

-    @Element(required=false)
-    @Argument(fullName="restartProcessingTracker",shortName="RPT",doc="Should we delete the processing tracker file at startup?",required=false)
-    @Hidden
-    public boolean restartProcessingTracker = false;
+    /**
+     * <p>Reads PED file-formatted tabular text files describing meta-data about the samples being
+     * processed in the GATK.</p>
+     *
+     * <ul>
+     *  <li>see <a href="http://www.broadinstitute.org/mpg/tagger/faq.html">http://www.broadinstitute.org/mpg/tagger/faq.html</a></li>
+     *  <li>see <a href="http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped">http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped</a></li>
+     * </ul>
+     *
+     * <p>The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:</p>
+     *
+     * <ul>
+     *  <li>Family ID</li>
+     *  <li>Individual ID</li>
+     *  <li>Paternal ID</li>
+     *  <li>Maternal ID</li>
+     *  <li>Sex (1=male; 2=female; other=unknown)</li>
+     *  <li>Phenotype</li>
+     * </ul>
+     *
+     *  <p>The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
+     *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
+     *  quantitative trait or an affection status column: GATK will automatically detect which type
+     *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).</p>
+     *
+     *  <p>If an individual's sex is unknown, then any character other than 1 or 2 can be used.</p>
+     *
+     *  <p>You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
+     *  line will be ignored. Do not start any family IDs with this character therefore.</p>
+     *
+     *  <p>Affection status should be coded:</p>
+     *
+     * <ul>
+     *  <li>-9 missing</li>
+     *   <li>0 missing</li>
+     *   <li>1 unaffected</li>
+     *   <li>2 affected</li>
+     * </ul>
+     *
+     * <p>If any value outside of -9,0,1,2 is detected than the samples are assumed
+     * to phenotype values are interpreted as string phenotype values.  In this case -9 uniquely
+     * represents the missing value.</p>
+     *
+     * <p>Genotypes (column 7 onwards) cannot be specified to the GATK.</p>
+     *
+     * <p>For example, here are two individuals (one row = one person):</p>
+     *
+     * <pre>
+     *   FAM001  1  0 0  1  2
+     *   FAM001  2  0 0  1  2
+     * </pre>
+     *
+     * <p>Each -ped argument can be tagged with NO_FAMILY_ID, NO_PARENTS, NO_SEX, NO_PHENOTYPE to
+     * tell the GATK PED parser that the corresponding fields are missing from the ped file.</p>
+     *
+     * <p>Note that most GATK walkers do not use pedigree information.  Walkers that require pedigree
+     * data should clearly indicate so in their arguments and will throw errors if required pedigree
+     * information is missing.</p>
+     */
+    @Argument(fullName="pedigree", shortName = "ped", doc="Pedigree files for samples",required=false)
+    public List<File> pedigreeFiles = Collections.emptyList();

-    @Element(required=false)
-    @Argument(fullName="processingTrackerStatusFile",shortName="CSF",doc="If provided, a detailed accounting of the state of the process tracker is written to this file.  For debugging, only",required=false)
-    @Hidden
-    public File processingTrackerStatusFile = null;
+    /**
+     * Inline PED records (see -ped argument).  Each -pedString STRING can contain one or more
+     * valid PED records (see -ped) separated by semi-colons.  Supports all tags for each pedString
+     * as -ped supports
+     */
+    @Argument(fullName="pedigreeString", shortName = "pedString", doc="Pedigree string for samples",required=false)
+    public List<String> pedigreeStrings = Collections.emptyList();

-    @Element(required=false)
-    @Argument(fullName="processingTrackerID",shortName="CID",doc="If provided, an integer ID (starting at 1) indicating a unique id for this process within the distributed GATK group",required=false)
-    @Hidden
-    public int processTrackerID = -1;
+    /**
+     * How strict should we be in parsing the PED files?
+     */
+    @Argument(fullName="pedigreeValidationType", shortName = "pedValidationType", doc="How strict should we be in validating the pedigree information?",required=false)
+    public PedigreeValidationType pedigreeValidationType = PedigreeValidationType.STRICT;
+
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // BAM indexing and sharding arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------

    @Element(required = false)
    @Argument(fullName="allow_intervals_with_unindexed_bam",doc="Allow interval processing with an unsupported BAM.  NO INTEGRATION TESTS are available.  Use at your own risk.",required=false)
@ -387,7 +443,7 @@ public class GATKArgumentCollection {
            return false;
        }
        if ((other.RODToInterval == null && RODToInterval != null) ||
-            (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
+                (other.RODToInterval != null && !other.RODToInterval.equals(RODToInterval))) {
            return false;
        }

@ -405,20 +461,6 @@ public class GATKArgumentCollection {
                (other.performanceLog != null && !other.performanceLog.equals(this.performanceLog)))
            return false;

-        if ((other.processingTrackerFile == null && this.processingTrackerFile != null) ||
-                (other.processingTrackerFile != null && !other.processingTrackerFile.equals(this.processingTrackerFile)))
-            return false;
-
-        if ((other.processingTrackerStatusFile == null && this.processingTrackerStatusFile != null) ||
-                (other.processingTrackerStatusFile != null && !other.processingTrackerStatusFile.equals(this.processingTrackerStatusFile)))
-            return false;
-
-        if ( restartProcessingTracker != other.restartProcessingTracker )
-            return false;
-
-        if ( processTrackerID != other.processTrackerID )
-            return false;
-
        if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
            return false;

--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/AlignmentContextUtils.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.contexts;

 import net.sf.samtools.SAMReadGroupRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -76,14 +75,6 @@ public class AlignmentContextUtils {
        return splitContextBySampleName(context, null);
    }

-    public static Map<Sample, AlignmentContext> splitContextBySample(AlignmentContext context) {
-        Map<Sample, AlignmentContext> m = new HashMap<Sample, AlignmentContext>();
-        for ( Map.Entry<String, AlignmentContext> entry : splitContextBySampleName(context, null).entrySet() ) {
-            m.put(new Sample(entry.getKey()), entry.getValue());
-        }
-        return m;
-    }
-
    /**
     * Splits the given AlignmentContext into a StratifiedAlignmentContext per sample, but referencd by sample name instead
     * of sample object.
@ -97,8 +88,8 @@ public class AlignmentContextUtils {
        GenomeLoc loc = context.getLocation();
        HashMap<String, AlignmentContext> contexts = new HashMap<String, AlignmentContext>();

-        for(String sample: context.getPileup().getSampleNames()) {
-            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSampleName(sample);
+        for(String sample: context.getPileup().getSamples()) {
+            ReadBackedPileup pileupBySample = context.getPileup().getPileupForSample(sample);

            // Don't add empty pileups to the split context.
            if(pileupBySample.size() == 0)
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/PropertyDefinition.java
@ -1,30 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 12, 2010
- * Time: 2:09:16 PM
- */
-public class PropertyDefinition {
-
-    String property;
-
-    String[] values;
-
-    public String getProperty() {
-        return property;
-    }
-
-    public void setProperty(String property) {
-        this.property = property;
-    }
-
-    public String[] getValues() {
-        return values;
-    }
-
-    public void setValues(String[] values) {
-        this.values = values;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/Sample.java
@ -1,203 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-
-import org.broadinstitute.sting.utils.exceptions.StingException;
-
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Jul 26, 2010
- * Time: 3:31:38 PM
- */
-public class Sample implements java.io.Serializable {
-
-    private final String id;
-
-    private boolean hasSampleFileEntry = false; // true if this sample has an entry in a sample file
-
-    private boolean hasSAMFileEntry = false; // true if this sample has an entry in the SAM file
-
-    private HashMap<String, Object> properties = new HashMap<String, Object>();
-
-    private HashMap<String, Sample> relationships = new HashMap<String, Sample>();
-
-    public enum Gender {
-        MALE,
-        FEMALE,
-        UNKNOWN
-    }
-
-    public Sample(String id) {
-/*        if (id == null) {
-            throw new StingException("Error creating sample: sample ID cannot be null");
-        }*/
-        this.id = id;
-    }
-
-    public String getId() {
-        return this.id;
-    }
-
-    public Map<String, Object> getProperties() {
-        return properties;
-    }
-
-    public void setProperties(Map<String, Object> properties) {
-        this.properties = (HashMap) properties;
-    }
-
-    public Map<String,Sample> getRelationships() {
-        return Collections.unmodifiableMap(this.relationships);
-    }
-
-    public void setSampleFileEntry(boolean value) {
-        this.hasSampleFileEntry = value;
-    }
-
-    public boolean hasSAMFileEntry() {
-        return this.hasSAMFileEntry;
-    }
-
-    public void setSAMFileEntry(boolean value) {
-        this.hasSAMFileEntry = value;
-    }
-
-    public boolean hasSampleFileEntry() {
-        return this.hasSampleFileEntry;
-    }
-
-    /**
-     * Get one property
-     * @param key key of property
-     * @return value of property as generic object
-     */
-    public Object getProperty(String key) {
-        return properties.get(key);
-    }
-
-    /**
-     * Set a property
-     * If property already exists, it is overwritten
-     * @param key key of property
-     * @param value object to be stored in properties array
-     */
-    public void setProperty(String key, Object value) {
-
-        if (relationships.containsKey(key)) {
-            throw new StingException("The same key cannot exist as a property and a relationship");
-        }
-
-        if (key.equals("gender") && value.getClass() != Gender.class) {
-            throw new StingException("'gender' property must be of type Sample.Gender");
-        }
-
-        if (key.equals("population") && value.getClass() != String.class) {
-            throw new StingException("'population' property must be of type String");
-        }
-
-        properties.put(key, value);
-    }
-
-    /**
-     * Get one relationship
-     * @param key of relationship
-     * @return Sample object that this relationship points to
-     */
-    public Sample getRelationship(String key) {
-        return relationships.get(key);
-    }
-
-    /**
-     * Set one relationship
-     * If already set, it is overwritten
-     * @param key key of the relationship
-     * @param value Sample object this relationship points to
-     */
-    public void setRelationship(String key, Sample value) {
-        if (properties.containsKey(key)) {
-            throw new StingException("The same key cannot exist as a property and a relationship");
-        }
-        relationships.put(key, value);
-    }
-
-    /**
-     * Get the sample's mother
-     * @return sample object with relationship mother, if exists, or null
-     */
-    public Sample getMother() {
-        return getRelationship("mother");
-    }
-
-    /**
-     * Get the sample's father
-     * @return sample object with relationship father, if exists, or null
-     */
-    public Sample getFather() {
-        return getRelationship("father");
-    }
-
-    /**
-     * Get gender of the sample
-     * @return property of key "gender" - must be of type Gender
-     */
-    public Gender getGender() {
-        return (Gender) properties.get("gender");
-    }
-
-    public String getPopulation() {
-        return (String) properties.get("population");
-    }
-
-    public String getFamilyId() {
-        return (String) properties.get("familyId");
-    }
-
-    /**
-     * @return True if sample is male, false if female, unknown, or null
-     */
-    public boolean isMale() {
-        return properties.get("gender") == Gender.MALE;
-    }
-
-    /**
-     * @return True if sample is female, false if male, unknown or null
-     */
-    public boolean isFemale() {
-        return properties.get("gender") == Gender.MALE;
-    }
-
-    /**
-     *
-     * @param key property key
-     * @return true if sample has this property (even if its value is null)
-     */
-    public boolean hasProperty(String key) {
-        return properties.containsKey(key);
-    }
-
-    @Override
-    public boolean equals(Object o) {
-        if (this == o) return true;
-        if (o == null || getClass() != o.getClass()) return false;
-
-        Sample sample = (Sample) o;
-
-        if (hasSAMFileEntry != sample.hasSAMFileEntry) return false;
-        if (hasSampleFileEntry != sample.hasSampleFileEntry) return false;
-        if (id != null ? !id.equals(sample.id) : sample.id != null) return false;
-        if (properties != null ? !properties.equals(sample.properties) : sample.properties != null) return false;
-        if (relationships != null ? !relationships.equals(sample.relationships) : sample.relationships != null)
-            return false;
-
-        return true;
-    }
-
-    @Override
-    public int hashCode() {
-        return id != null ? id.hashCode() : "".hashCode();
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleAlias.java
@ -1,31 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 13, 2010
- * Time: 5:13:46 PM
- */
-public class SampleAlias {
-
-    String mainId;
-
-    String[] otherIds;
-
-    public String getMainId() {
-        return mainId;
-    }
-
-    public void setMainId(String mainId) {
-        this.mainId = mainId;
-    }
-
-    public String[] getOtherIds() {
-        return otherIds;
-    }
-
-    public void setOtherIds(String[] otherIds) {
-        this.otherIds = otherIds;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSource.java
@ -1,590 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMReadGroupRecord;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.SampleUtils;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.yaml.snakeyaml.TypeDescription;
-import org.yaml.snakeyaml.Yaml;
-import org.yaml.snakeyaml.constructor.Constructor;
-
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Jul 26, 2010
- * Time: 3:30:09 PM
- *
- * This class stores and manages sample metadata. This data is encoded in a sample file, which can be included
- * in the GATK by the "--samples" argument. This class reads and parses those files.
- *
- * Although there are a set of public methods for accessing sample data, they aren't used by walkers - they are really
- * only used by GenomeAnalysisEngine. An instance of GenomeAnalysisEngine has one SampleDataSource. When a walker
- * wants to access sample data, it asks GenomeAnalysis to fetch this data from its SampleDataSource.
- *
- */
-public class SampleDataSource {
-
-    /**
-     * SAMFileHeader that has been created for this analysis.
-     */
-    private SAMFileHeader header;
-
-    /**
-     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
-     * this is stored as a HashMap.
-     */
-    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
-
-    /**
-     * Samples can have "aliases", because sometimes the same sample is referenced by different IDs in different
-     * datasets. If this is the case, one ID is the "primary ID" and others are "aliases".
-     *
-     * This maps ID => primary ID for all samples ID strings - both primary IDs and aliases.
-     */
-    private HashMap<String, String> sampleAliases = new HashMap<String, String>();
-
-    /**
-     * While loading sample files, we must be aware of "special" properties and relationships that are always allowed
-     */
-    public static final String[] specialProperties = new String[] {"familyId", "population", "gender"};
-    public static final String[] specialRelationships = new String[] {"mother", "father"};
-
-    /**
-     * Constructor takes both a SAM header and sample files because the two must be integrated.
-     * @param header SAMFileHeader that has been created for this analysis
-     * @param sampleFiles Sample files that were included on the command line
-     */
-    public SampleDataSource(SAMFileHeader header, List<File> sampleFiles) {
-        this();
-        this.header = header;
-        // create empty sample object for each sample referenced in the SAM header
-        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
-            if (!hasSample(sampleName)) {
-                Sample newSample = new Sample(sampleName);
-                newSample.setSAMFileEntry(true);
-                samples.put(sampleName, newSample);
-            }
-        }
-
-        // add files consecutively
-        if (sampleFiles != null) {
-            for (File file : sampleFiles) {
-                addFile(file);
-            }
-        }
-    }
-
-    public SampleDataSource() {
-        samples.put(null, new Sample(null));
-    }
-
-    /**
-     * Hallucinates sample objects for all the samples in the SAM file and stores them
-     */
-    public void addSamplesFromSAMHeader(SAMFileHeader header) {
-        for (String sampleName : SampleUtils.getSAMFileSamples(header)) {
-            if (!hasSample(sampleName)) {
-                Sample newSample = new Sample(sampleName);
-                newSample.setSAMFileEntry(true);
-                samples.put(sampleName, newSample);
-            }
-        }
-    }
-
-    /**
-     * Parse one sample file and integrate it with samples that are already there
-     * Fail quickly if we find any errors in the file
-     */
-    public void addFile(File sampleFile) {
-
-        BufferedReader reader;
-        try {
-            reader = new BufferedReader(new FileReader(sampleFile));
-        }
-        catch (IOException e) {
-            throw new StingException("Could not open sample file " + sampleFile.getAbsolutePath(), e);
-        }
-
-        // set up YAML reader - a "Constructor" creates java object from YAML and "Loader" loads the file
-        Constructor con = new Constructor(SampleFileParser.class);
-        TypeDescription desc = new TypeDescription(SampleFileParser.class);
-        desc.putListPropertyType("propertyDefinitions", PropertyDefinition.class);
-        desc.putListPropertyType("sampleAliases", SampleAlias.class);
-        con.addTypeDescription(desc);
-        Yaml yaml = new Yaml(con);
-
-        // SampleFileParser stores an object representation of a sample file - this is what we'll parse
-        SampleFileParser parser;
-        try {
-            parser = (SampleFileParser) yaml.load(reader);
-        }
-        catch (Exception e) {
-            throw new StingException("There was a syntactic error with the YAML in sample file " + sampleFile.getAbsolutePath(), e);
-        }
-
-        // check to see which validation options were built into the file
-        boolean restrictProperties = parser.getAllowedProperties() != null;
-        boolean restrictRelationships = parser.getAllowedRelationships() != null;
-        boolean restrictPropertyValues = parser.getPropertyDefinitions() != null;
-
-        // propertyValues stores the values that are allowed for a given property
-        HashMap<String, HashSet> propertyValues = null;
-        if (restrictPropertyValues) {
-            propertyValues = new HashMap<String, HashSet>();
-            for (PropertyDefinition def : parser.getPropertyDefinitions()) {
-                HashSet<String> set = new HashSet<String>();
-                for (String value : def.getValues()) {
-                    set.add(value);
-                }
-                propertyValues.put(def.getProperty(), set);
-            }
-        }
-
-        // make sure the aliases are valid
-        validateAliases(parser);
-
-        // loop through each sample in the file - a SampleParser stores an object that will become a Sample
-        for (SampleParser sampleParser : parser.getSamples()) {
-
-            try {
-            // step 1: add the sample if it doesn't already exist
-            Sample sample = getSampleById(sampleParser.getId());
-            if (sample == null) {
-                sample = new Sample(sampleParser.getId());
-            }
-            addSample(sample);
-            sample.setSampleFileEntry(true);
-
-            // step 2: add the properties
-            if (sampleParser.getProperties() != null) {
-                for (String property : sampleParser.getProperties().keySet()) {
-
-                    // check that property is allowed
-                    if (restrictProperties) {
-                        if (!isPropertyValid(property, parser.getAllowedProperties())) {
-                            throw new StingException(property + " is an invalid property. It is not included in the list " +
-                                    "of allowed properties.");
-                        }
-                    }
-
-                    // next check that the value is allowed
-                    if (restrictPropertyValues) {
-                        if (!isValueAllowed(property, sampleParser.getProperties().get(property), propertyValues)) {
-                            throw new StingException("The value of property '" + property + "' is invalid. " +
-                                    "It is not included in the list of allowed values for this property.");
-                        }
-                    }
-
-                    // next check that there isn't already a conflicting property there
-                    if (sample.getProperty(property) != null &&
-                            sample.getProperty(property) != sampleParser.getProperties().get(property))
-                    {
-                        throw new StingException(property + " is a conflicting property!");
-                    }
-
-                    // checks are passed - now add the property!
-                    saveProperty(sample, property, sampleParser.getProperties().get(property));
-                }
-            }
-
-            // step 3: add the relationships
-            if (sampleParser.getRelationships() != null) {
-                for (String relationship : sampleParser.getRelationships().keySet()) {
-                    String relativeId = sampleParser.getRelationships().get(relationship);
-                    if (relativeId == null) {
-                        throw new StingException("The relationship cannot be null");
-                    }
-
-                    // first check that it's not invalid
-                    if (restrictRelationships) {
-                        if (!isRelationshipValid(relationship, parser.getAllowedRelationships())) {
-                            throw new StingException(relationship + " is an invalid relationship");
-                        }
-                    }
-
-                    // next check that there isn't already a conflicting property there
-                    if (sample.getRelationship(relationship) != null) {
-                        if (sample.getRelationship(relationship).getId() != sampleParser.getProperties().get(relationship)) {
-                            throw new StingException(relationship + " is a conflicting relationship!");
-                        }
-                        // if the relationship is already set - and consistent with what we're reading now - no need to continue
-                        else {
-                            continue;
-                        }
-                    }
-
-                    // checks are passed - now save the relationship
-                    saveRelationship(sample, relationship, relativeId);
-                }
-            }
-        } catch (Exception e) {
-              throw new StingException("An error occurred while loading this sample from the sample file: " +
-                      sampleParser.getId(), e);
-        }
-        }
-
-    }
-
-    private boolean isValueAllowed(String key, Object value, HashMap<String, HashSet> valuesList) {
-
-        // if the property values weren't specified for this property, then any value is okay
-        if (!valuesList.containsKey(key)) {
-            return true;
-        }
-
-        // if this property has enumerated values, it must be a string
-        else if (value.getClass() != String.class)
-            return false;
-
-        // is the value specified or not?
-        else if (!valuesList.get(key).contains(value))
-            return false;
-
-        return true;
-    }
-
-    /**
-     * Makes sure that the aliases are valid
-     * Checks that 1) no string is used as both a main ID and an alias;
-     * 2) no alias is used more than once
-     * @param parser
-     */
-    private void validateAliases(SampleFileParser parser) {
-
-        // no aliases sure validate
-        if (parser.getSampleAliases() == null)
-            return;
-
-        HashSet<String> mainIds = new HashSet<String>();
-        HashSet<String> otherIds = new HashSet<String>();
-
-        for (SampleAlias sampleAlias : parser.getSampleAliases()) {
-            mainIds.add(sampleAlias.getMainId());
-            for (String otherId : sampleAlias.getOtherIds()) {
-                if (mainIds.contains(otherId))
-                    throw new StingException(String.format("The aliases in your sample file are invalid - the alias %s cannot " +
-                            "be both a main ID and an other ID", otherId));
-
-                if (!otherIds.add(otherId))
-                    throw new StingException(String.format("The aliases in your sample file are invalid - %s is listed as an " +
-                            "alias more than once.", otherId));
-            }
-        }
-    }
-
-    private boolean isPropertyValid(String property, String[] allowedProperties) {
-
-        // is it a special property that is always allowed?
-        for (String allowedProperty : specialProperties) {
-            if (property.equals(allowedProperty))
-                return true;
-        }
-
-        // is it in the allowed properties list?
-        for (String allowedProperty : allowedProperties) {
-            if (property.equals(allowedProperty))
-                return true;
-        }
-
-        return false;
-    }
-
-    private boolean isRelationshipValid(String relationship, String[] allowedRelationships) {
-
-        // is it a special relationship that is always allowed?
-        for (String allowedRelationship : specialRelationships) {
-            if (relationship.equals(allowedRelationship))
-                return true;
-        }
-
-        // is it in the allowed properties list?
-        for (String allowedRelationship : allowedRelationships) {
-            if (relationship.equals(allowedRelationship))
-                return true;
-        }
-
-        return false;
-    }
-
-    /**
-     * Saves a property as the correct type
-     * @param key property key
-     * @param value property value, as read from YAML parser
-     * @return property value to be stored
-     */
-    private void saveProperty(Sample sample, String key, Object value) {
-
-        // convert gender to the right type, if it was stored as a String
-        if (key.equals("gender")) {
-            if (((String) value).toLowerCase().equals("male")) {
-                value = Sample.Gender.MALE;
-            }
-            else if (((String) value).toLowerCase().equals("female")) {
-                value = Sample.Gender.FEMALE;
-            }
-            else  if (((String) value).toLowerCase().equals("unknown")) {
-                value = Sample.Gender.UNKNOWN;
-            }
-            else if (value != null) {
-                throw new StingException("'gender' property must be male, female, or unknown.");
-            }
-        }
-        try {
-            sample.setProperty(key, value);
-        }
-        catch (Exception e) {
-            throw new StingException("Could not save property " + key, e);
-        }
-    }
-
-    /**
-     * Saves a relationship as the correct type
-     * @param key relationship key
-     * @param relativeId sample ID string of the relative
-     * @return relationship value to be stored
-     */
-    private void saveRelationship(Sample sample, String key, String relativeId) {
-
-        // get the reference that we'll store as the value
-        Sample relative = getSampleById(relativeId);
-
-        // create sample object for the relative, if necessary
-        if (relative == null) {
-            relative = new Sample(relativeId);
-            addSample(relative);
-        }
-        sample.setRelationship(key, relative);
-    }
-
-
-
-    /**
-     * Filter a sample name in case it is an alias
-     * @param sampleId to be filtered
-     * @return ID of sample that stores data for this alias
-     */
-    private String aliasFilter(String sampleId) {
-        if (!sampleAliases.containsKey(sampleId))
-            return sampleId;
-        else
-            return sampleAliases.get(sampleId);
-    }
-
-    /**
-     * Add a sample to the collection
-     * @param sample to be added
-     */
-    private void addSample(Sample sample) {
-        samples.put(sample.getId(), sample);
-    }
-
-    /**
-     * Check if sample with this ID exists
-     * Note that this will return true if name passed in is an alias
-     * @param id ID of sample to be checked
-     * @return true if sample exists; false if not
-     */
-    public boolean hasSample(String id) {
-        return samples.get(aliasFilter(id)) != null;
-    }
-
-    /**
-     * Get a sample by its ID
-     * If an alias is passed in, return the main sample object 
-     * @param id
-     * @return sample Object with this ID
-     */
-    public Sample getSampleById(String id) {
-        return samples.get(aliasFilter(id));
-    }
-
-    /**
-     * Get the sample for a given read group
-     * Must first look up ID for read group
-     * @param readGroup of sample
-     * @return sample object with ID from the read group
-     */
-    public Sample getSampleByReadGroup(SAMReadGroupRecord readGroup) {
-        String nameFromReadGroup = readGroup.getSample();
-        return getSampleById(nameFromReadGroup);
-    }
-
-    /**
-     * Get a sample for a given read
-     * Must first look up read group, and then sample ID for that read group
-     * @param read of sample
-     * @return sample object of this read
-     */
-    public Sample getSampleByRead(SAMRecord read) {
-        return getSampleByReadGroup(read.getReadGroup());
-    }
-
-    /**
-     * Get number of sample objects
-     * @return size of samples map
-     */
-    public int sampleCount() {
-        return samples.size();
-    }
-
-    /**
-     * Return all samples with a given family ID
-     * Note that this isn't terribly efficient (linear) - it may be worth adding a new family ID data structure for this
-     * @param familyId
-     * @return
-     */
-    public Set<Sample> getFamily(String familyId) {
-        HashSet<Sample> familyMembers = new HashSet<Sample>();
-
-        for (Sample sample : samples.values()) {
-            if (sample.getFamilyId() != null) {
-                if (sample.getFamilyId().equals(familyId))
-                    familyMembers.add(sample);
-            }
-        }
-        return familyMembers;
-    }
-
-    /**
-     * Returns all children of a given sample
-     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
-     * @param sample
-     * @return
-     */
-    public Set<Sample> getChildren(Sample sample) {
-        HashSet<Sample> children = new HashSet<Sample>();
-        for (Sample familyMember : getFamily(sample.getFamilyId())) {
-            if (familyMember.getMother() == sample || familyMember.getFather() == sample) {
-                children.add(familyMember);
-            }
-        }
-        return children;
-    }
-
-    public Set<Sample> getSamples() {
-        HashSet<Sample> set = new HashSet<Sample>();
-        set.addAll(samples.values());
-        return set;
-    }
-
-    /**
-     * Takes a collection of sample names and returns their corresponding sample objects
-     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
-     * @param sampleNameList Set of sample names
-     * @return Corresponding set of samples
-     */
-    public Set<Sample> getSamples(Collection<String> sampleNameList) {
-	HashSet<Sample> samples = new HashSet<Sample>();
-        for (String name : sampleNameList) {
-            try {
-                samples.add(getSampleById(name));
-            }
-            catch (Exception e) {
-                throw new StingException("Could not get sample with the following ID: " + name, e);
-            }
-        }
-        return samples;
-    }
-
-    /**
-     * Returns a set of samples that have any value (which could be null) for a given property
-     * @param key Property key
-     * @return Set of samples with the property
-     */
-    public Set<Sample> getSamplesWithProperty(String key) {
-        HashSet<Sample> toReturn = new HashSet<Sample>();
-        for (Sample s : samples.values()) {
-            if (s.hasProperty(key))
-                toReturn.add(s);
-        }
-        return toReturn;
-    }
-
-    /**
-     * Returns a set of samples that have a property with a certain value
-     * Value must be a string for now - could add a similar method for matching any objects in the future
-     *
-     * @param key Property key
-     * @param value String property value
-     * @return Set of samples that match key and value
-     */
-    public Set<Sample> getSamplesWithProperty(String key, String value) {
-        Set<Sample> toReturn = getSamplesWithProperty(key);
-        for (Sample s : toReturn) {
-            if (!s.getProperty(key).equals(value))
-                toReturn.remove(s);
-        }
-        return toReturn;
-    }
-
-    public Sample getOrCreateSample(String id) {
-        Sample sample = getSampleById(id);
-        if (sample == null) {
-            sample = new Sample(id);
-            addSample(sample);
-        }
-        return sample;
-    }
-
-    /**
-     * Returns all samples that were referenced in the SAM file
-     */
-    public Set<Sample> getSAMFileSamples() {
-        Set<Sample> toReturn = new HashSet<Sample>();
-        for (Sample sample : samples.values()) {
-            if (sample.hasSAMFileEntry())
-                toReturn.add(sample);
-        }
-        return toReturn;
-    }
-
-    /**
-     * Returns a set of sample objects for the sample names in a variant context
-     *
-     * @param context Any variant context
-     * @return a set of the sample objects
-     */
-    public Set<Sample> getSamplesByVariantContext(VariantContext context) {
-        Set<Sample> samples = new HashSet<Sample>();
-        for (String sampleName : context.getSampleNames()) {
-            samples.add(getOrCreateSample(sampleName));
-        }
-        return samples;
-    }
-
-
-    /**
-     * Return a subcontext restricted to samples with a given property key/value
-     * Gets the sample names from key/value and relies on VariantContext.subContextFromGenotypes for the filtering
-     * @param context VariantContext to filter
-     * @param key property key
-     * @param value property value (must be string)
-     * @return subcontext
-     */
-    public VariantContext subContextFromSampleProperty(VariantContext context, String key, String value) {
-
-        Set<String> samplesWithProperty = new HashSet<String>();
-        for (String sampleName : context.getSampleNames()) {
-            Sample s = samples.get(sampleName);
-            if (s != null && s.hasProperty(key) && s.getProperty(key).equals(value))
-                samplesWithProperty.add(sampleName);
-        }
-        Map<String, Genotype> genotypes = context.getGenotypes(samplesWithProperty);
-        return context.subContextFromGenotypes(genotypes.values());
-    }
-
-    public static SampleDataSource createEmptyDataSource() {
-        SAMFileHeader header = new SAMFileHeader();
-        return new SampleDataSource(header, null);
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleFileParser.java
@ -1,65 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 12, 2010
- * Time: 1:30:44 PM
- */
-public class SampleFileParser {
-
-    private SampleAlias[] sampleAliases;
-
-    private String[] allowedProperties;
-
-    private String[] allowedRelationships;
-
-    private PropertyDefinition[] propertyDefinitions;
-
-    private SampleParser[] samples;
-
-    public PropertyDefinition[] getPropertyDefinitions() {
-        return propertyDefinitions;
-    }
-
-    public void setPropertyDefinitions(PropertyDefinition[] propertyDefinitions) {
-        this.propertyDefinitions = propertyDefinitions;
-    }
-
-    public SampleFileParser() {
-
-    }
-
-    public String[] getAllowedProperties() {
-        return allowedProperties;
-    }
-
-    public void setAllowedProperties(String[] allowedProperties) {
-        this.allowedProperties = allowedProperties;
-    }
-
-    public SampleParser[] getSamples() {
-        return samples;
-    }
-
-    public void setSamples(SampleParser[] samples) {
-        this.samples = samples;
-    }
-
-    public String[] getAllowedRelationships() {
-        return allowedRelationships;
-    }
-
-    public void setAllowedRelationships(String[] allowedRelationships) {
-        this.allowedRelationships = allowedRelationships;
-    }
-
-    public SampleAlias[] getSampleAliases() {
-        return sampleAliases;
-    }
-
-    public void setSampleAliases(SampleAlias[] sampleAliases) {
-        this.sampleAliases = sampleAliases;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/sample/SampleParser.java
@ -1,43 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import java.util.HashMap;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Aug 13, 2010
- * Time: 2:09:43 PM
- */
-public class SampleParser {
-
-    private String id;
-
-    private HashMap<String, Object> properties;
-
-    private HashMap<String, String> relationships;
-
-    public String getId() {
-        return id;
-    }
-
-    public void setId(String id) {
-        this.id = id;
-    }
-
-    public HashMap<String, Object> getProperties() {
-        return properties;
-    }
-
-    public void setProperties(HashMap<String, Object> properties) {
-        this.properties = properties;
-    }
-
-    public HashMap<String, String> getRelationships() {
-        return relationships;
-    }
-
-    public void setRelationships(HashMap<String, String> relationships) {
-        this.relationships = relationships;
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -84,12 +84,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     */
    protected HierarchicalMicroScheduler(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse ) {
        super(engine, walker, reads, reference, rods);
-
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
-
-        if (engine.getArguments().processingTrackerFile != null) {
-            throw new UserException.BadArgumentValue("-C", "Distributed GATK calculations currently not supported in multi-threaded mode.  Complain to Mark depristo@broadinstitute.org to implement and test this code path");
-        }
    }

    public Object execute( Walker walker, ShardStrategy shardStrategy ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.SampleUtils;

 import java.util.Collection;

@ -56,7 +57,8 @@ public class LinearMicroScheduler extends MicroScheduler {
            traversalEngine.startTimersIfNecessary();
            if(shard.getShardType() == Shard.ShardType.LOCUS) {
                LocusWalker lWalker = (LocusWalker)walker;
-                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(), getReadIterator(shard), shard.getGenomeLocs(), engine.getSampleMetadata());
+                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
+                        getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
                for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
                    ShardDataProvider dataProvider = new LocusShardDataProvider(shard,iterator.getSourceInfo(),engine.getGenomeLocParser(),iterator.getLocus(),iterator,reference,rods);
                    Object result = traversalEngine.traverse(walker, dataProvider, accumulator.getReduceInit());
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/ShardTraverser.java
@ -62,7 +62,10 @@ public class ShardTraverser implements Callable {

            Object accumulator = walker.reduceInit();
            LocusWalker lWalker = (LocusWalker)walker;
-            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),microScheduler.getReadIterator(shard),shard.getGenomeLocs(), microScheduler.engine.getSampleMetadata()); // todo: microScheduler.engine is protected - is it okay to user it here?
+            WindowMaker windowMaker = new WindowMaker(shard,microScheduler.getEngine().getGenomeLocParser(),
+                    microScheduler.getReadIterator(shard),
+                    shard.getGenomeLocs(),
+                    microScheduler.engine.getSampleDB().getSampleNames()); // todo: microScheduler.engine is protected - is it okay to user it here?
            ShardDataProvider dataProvider = null;

            for(WindowMaker.WindowMakerIterator iterator: windowMaker) {
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@ -4,7 +4,6 @@ import net.sf.picard.util.PeekableIterator;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
@ -12,6 +11,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

+import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.NoSuchElementException;
@ -63,17 +63,20 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
     * the given intervals.
     * @param iterator The data source for this window.
     * @param intervals The set of intervals over which to traverse.
-     * @param sampleData SampleDataSource that we can reference reads with
+     * @param sampleNames The complete set of sample names in the reads in shard
     */

-    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, SampleDataSource sampleData ) {
+    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
        this.sourceInfo = shard.getReadProperties();
        this.readIterator = iterator;
-
-        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleData));
+        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
        this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
    }

+    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
+        this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
+    }
+
    public Iterator<WindowMakerIterator> iterator() {
        return this;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -35,12 +35,11 @@ import org.broadinstitute.sting.gatk.DownsampleType;
 import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.ReservoirDownsampler;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.ExtendedEventPileupElement;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
@ -52,9 +51,6 @@ import java.util.*;

 /** Iterator that traverses a SAM File, accumulating information on a per-locus basis */
 public class LocusIteratorByState extends LocusIterator {
-//    private static long discarded_bases = 0L;
-//    private static long observed_bases = 0L;
-
    /** our log, which we want to capture anything from this class */
    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);

@ -69,7 +65,7 @@ public class LocusIteratorByState extends LocusIterator {
     * Used to create new GenomeLocs.
     */
    private final GenomeLocParser genomeLocParser;
-    private final ArrayList<Sample> samples;
+    private final ArrayList<String> samples;
    private final ReadStateManager readStates;

    static private class SAMRecordState {
@ -278,15 +274,30 @@ public class LocusIteratorByState extends LocusIterator {
    //
    // -----------------------------------------------------------------------------------------------------------------

-    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, SampleDataSource sampleData ) {
+    public LocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples ) {
        this.readInfo = readInformation;
        this.genomeLocParser = genomeLocParser;
+        this.samples = new ArrayList<String>(samples);
+        this.readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());

-        // get the list of samples
-        this.samples = new ArrayList<Sample>(sampleData.getSamples());
-        
-        readStates = new ReadStateManager(samIterator,readInformation.getDownsamplingMethod());
-    
+        // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
+        // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
+        if ( this.samples.isEmpty() && samIterator.hasNext() ) {
+            // actually we cannot process BAMs without read groups unless we tolerate empty
+            // sample lists.  In the empty case we need to add the null element to the samples
+            this.samples.add(null);
+            //throw new IllegalArgumentException("samples list must not be empty");
+        }
+    }
+
+    /**
+     * For testing only.  Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
+     * for the system.
+     */
+    public final static Collection<String> sampleListForSAMWithoutReadGroups() {
+        List<String> samples = new ArrayList<String>();
+        samples.add(null);
+        return samples;
    }

    public Iterator<AlignmentContext> iterator() {
@ -303,19 +314,6 @@ public class LocusIteratorByState extends LocusIterator {
        //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
    }

-    public void printState() {
-        for(Sample sample: samples) {
-            Iterator<SAMRecordState> iterator = readStates.iterator(sample);
-            while(iterator.hasNext()) {
-                SAMRecordState state = iterator.next();
-                logger.debug(String.format("printState():"));
-                SAMRecord read = state.getRead();
-                int offset = state.getReadOffset();
-                logger.debug(String.format("  read: %s(%d)=%s, cigar=%s", read.getReadName(), offset, (char)read.getReadBases()[offset], read.getCigarString()));
-            }
-        }
-    }
-
    private GenomeLoc getLocation() {
        return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
    }
@ -355,14 +353,14 @@ public class LocusIteratorByState extends LocusIterator {
            // In this case, the subsequent call to next() will emit the normal pileup at the current base
            // and shift the position.
            if (readInfo.generateExtendedEvents() && hasExtendedEvents) {
-                Map<Sample,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<Sample,ReadBackedExtendedEventPileupImpl>();
+                Map<String,ReadBackedExtendedEventPileupImpl> fullExtendedEventPileup = new HashMap<String,ReadBackedExtendedEventPileupImpl>();

                // get current location on the reference and decrement it by 1: the indels we just stepped over
                // are associated with the *previous* reference base
                GenomeLoc loc = genomeLocParser.incPos(getLocation(),-1);

                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<ExtendedEventPileupElement> indelPile = new ArrayList<ExtendedEventPileupElement>(readStates.size(sample));
                    hasBeenSampled |= loc.getStart() <= readStates.getDownsamplingExtent(sample);
@ -426,10 +424,10 @@ public class LocusIteratorByState extends LocusIterator {
                nextAlignmentContext = new AlignmentContext(loc, new ReadBackedExtendedEventPileupImpl(loc, fullExtendedEventPileup), hasBeenSampled);
            }  else {
                GenomeLoc location = getLocation();
-                Map<Sample,ReadBackedPileupImpl> fullPileup = new HashMap<Sample,ReadBackedPileupImpl>();
+                Map<String,ReadBackedPileupImpl> fullPileup = new HashMap<String,ReadBackedPileupImpl>();

                boolean hasBeenSampled = false;
-                for(Sample sample: samples) {
+                for(final String sample: samples) {
                    Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                    List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
                    hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
@ -495,7 +493,7 @@ public class LocusIteratorByState extends LocusIterator {
    }

    private void updateReadStates() {
-        for(Sample sample: samples) {
+        for(final String sample: samples) {
            Iterator<SAMRecordState> it = readStates.iterator(sample);
            while ( it.hasNext() ) {
                SAMRecordState state = it.next();
@ -522,7 +520,7 @@ public class LocusIteratorByState extends LocusIterator {
        private final PeekableIterator<SAMRecord> iterator;
        private final DownsamplingMethod downsamplingMethod;
        private final SamplePartitioner samplePartitioner;
-        private final Map<Sample,PerSampleReadStateManager> readStatesBySample = new HashMap<Sample,PerSampleReadStateManager>();
+        private final Map<String,PerSampleReadStateManager> readStatesBySample = new HashMap<String,PerSampleReadStateManager>();
        private final int targetCoverage;
        private int totalReadStates = 0;

@ -540,9 +538,9 @@ public class LocusIteratorByState extends LocusIterator {
            }

            Map<String,ReadSelector> readSelectors = new HashMap<String,ReadSelector>();
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                readStatesBySample.put(sample,new PerSampleReadStateManager());
-                readSelectors.put(sample.getId(),downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
+                readSelectors.put(sample,downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null,targetCoverage) : new AllReadsSelector());
            }

            samplePartitioner = new SamplePartitioner(readSelectors);
@ -554,7 +552,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Iterator over the reads associated with that sample.
         */
-        public Iterator<SAMRecordState> iterator(final Sample sample) {
+        public Iterator<SAMRecordState> iterator(final String sample) {
            return new Iterator<SAMRecordState>() {
                private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();

@ -590,7 +588,7 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample The sample.
         * @return Total number of reads in the given sample.
         */
-        public int size(final Sample sample) {
+        public int size(final String sample) {
            return readStatesBySample.get(sample).size();
        }

@ -600,12 +598,12 @@ public class LocusIteratorByState extends LocusIterator {
         * @param sample Sample, downsampled independently.
         * @return Integer stop of the furthest undownsampled region.
         */
-        public int getDownsamplingExtent(final Sample sample) {
+        public int getDownsamplingExtent(final String sample) {
            return readStatesBySample.get(sample).getDownsamplingExtent();
        }

        public SAMRecordState getFirst() {
-            for(Sample sample: samples) {
+            for(final String sample: samples) {
                PerSampleReadStateManager reads = readStatesBySample.get(sample);
                if(!reads.isEmpty())
                    return reads.peek();
@ -639,8 +637,8 @@ public class LocusIteratorByState extends LocusIterator {
            }
            samplePartitioner.complete();

-            for(Sample sample: samples) {
-                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample.getId());
+            for(final String sample: samples) {
+                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);

                Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());

@ -1072,6 +1070,3 @@ class SamplePartitioner implements ReadSelector {
    }

 }
-
-
-
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Affection.java
@ -0,0 +1,46 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+ * Categorical sample trait for association and analysis
+ *
+ * Samples can have unknown status, be affected or unaffected by the
+ * categorical trait, or they can be marked as actually having an
+ * other trait value (stored in an associated value in the Sample class)
+ *
+ * @author Mark DePristo
+ * @since Sept. 2011
+ */
+public enum Affection {
+    /** Status is unknown */
+    UNKNOWN,
+    /** Suffers from the disease */
+    AFFECTED,
+    /** Unaffected by the disease */
+    UNAFFECTED,
+    /** An "other" trait: value of the trait is stored elsewhere and is an arbitrary string */
+    OTHER
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Gender.java
@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+* ENUM of possible human genders: male, female, or unknown
+*/
+public enum Gender {
+    MALE,
+    FEMALE,
+    UNKNOWN
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedReader.java
@ -0,0 +1,310 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.text.XReadLines;
+
+import java.io.*;
+import java.util.*;
+
+/**
+ * Reads PED file-formatted tabular text files
+ *
+ * See http://www.broadinstitute.org/mpg/tagger/faq.html
+ * See http://pngu.mgh.harvard.edu/~purcell/plink/data.shtml#ped
+ *
+ * The "ped" file format refers to the widely-used format for linkage pedigree data.
+ * Each line describes a single (diploid) individual in the following format:
+ *
+ *      family_ID individual_ID father_ID mother_ID gender phenotype genotype_1 genotype_2 ...
+ *
+ * If your data lacks pedigree information (for example, unrelated case/control individuals),
+ * set the father_ID and mother_ID to 0. sex denotes the individual's gender with 1=male and 2=female.
+ * phenotype refers to the affected status (for association studies) where 0=unknown, 1=unaffected, 2=affected.
+ * Finally, each genotype is written as two (=diploid) integer numbers (separated by whitespace),
+ * where 1=A, 2=C, 3=G, 4=T. No header lines are allowed and all columns must be separated by whitespace.
+ * Check out the information at the PLINK website on the "ped" file format.
+ *
+ * The PED file is a white-space (space or tab) delimited file: the first six columns are mandatory:
+ *  Family ID
+ *  Individual ID
+ *  Paternal ID
+ *  Maternal ID
+ *  Sex (1=male; 2=female; other=unknown)
+ *  Phenotype
+ *
+ *  The IDs are alphanumeric: the combination of family and individual ID should uniquely identify a person.
+ *  A PED file must have 1 and only 1 phenotype in the sixth column. The phenotype can be either a
+ *  quantitative trait or an affection status column: PLINK will automatically detect which type
+ *  (i.e. based on whether a value other than 0, 1, 2 or the missing genotype code is observed).
+ *  Note that the GATK actually supports arbitrary values for quantitative trait -- not just doubles --
+ *  and are actually representing these values as strings instead of doubles
+ *
+ *  NOTE Quantitative traits with decimal points must be coded with a period/full-stop character and
+ *  not a comma, i.e. 2.394 not 2,394
+ *
+ *  If an individual's sex is unknown, then any character other than 1 or 2 can be used.
+ *  When new files are created (PED, FAM, or other which contain sex) then the original coding will be
+ *  preserved. However, these individuals will be dropped from any analyses (i.e. phenotype set to missing also)
+ *  and an error message will arise if an analysis that uses family information is requested and an
+ *  individual of 'unknown' sex is specified as a father or mother.
+ *
+ *
+ *  HINT You can add a comment to a PED or MAP file by starting the line with a # character. The rest of that
+ *  line will be ignored. Do not start any family IDs with this character therefore.
+ *
+ *  Affection status, by default, should be coded:
+ *  -9 missing
+ *   0 missing
+ *   1 unaffected
+ *   2 affected
+ *
+ * If your file is coded 0/1 to represent unaffected/affected, then use the --1 flag:
+ * plink --file mydata --1 which will specify a disease phenotype coded:
+ *
+ *  -9 missing
+ *  0 unaffected
+ *  1 affected
+ *
+ * The missing phenotype value for quantitative traits is, by default, -9 (this can also be used for
+ * disease traits as well as 0). It can be reset by including the --missing-phenotype option:
+ *
+ * Genotypes (column 7 onwards) should also be white-space delimited; they can be any character
+ * (e.g. 1,2,3,4 or A,C,G,T or anything else) except 0 which is, by default, the missing genotype
+ * character. All markers should be biallelic. All SNPs (whether haploid or not) must have two
+ * alleles specified. Either Both alleles should be missing (i.e. 0) or neither.
+ *
+ * No header row should be given. For example, here are two individuals typed for 3 SNPs (one row = one person):
+ *
+ *   FAM001  1  0 0  1  2  A A  G G  A C
+ *   FAM001  2  0 0  1  2  A A  A G  0 0
+ *   ...
+ *
+ * Note that the GATK does not support genotypes in a PED file.
+ *
+ * @author Mark DePristo
+ * @since 2011
+ */
+public class PedReader {
+    private static Logger logger = Logger.getLogger(PedReader.class);
+    final static private Set<String> CATAGORICAL_TRAIT_VALUES = new HashSet<String>(Arrays.asList("-9", "0", "1", "2"));
+    final static private String commentMarker = "#";
+
+    /**
+     * An enum that specifies which, if any, of the standard PED fields are
+     * missing from the input records.  For example, suppose we have the full record:
+     *
+     * "fam1 kid dad mom 1 2"
+     *
+     * indicating a male affected child.  This can be parsed with the -ped x.ped argument
+     * to the GATK.  Suppose we only have:
+     *
+     * "fam1 kid 1"
+     *
+     * we can parse the reduced version of this record with -ped:NO_PARENTS,NO_PHENOTYPE x.ped
+     */
+    public enum MissingPedField {
+        /**
+         * The PED records do not have the first (FAMILY_ID) argument.  The family id
+         * will be set to null / empty.
+         */
+        NO_FAMILY_ID,
+
+        /**
+         * The PED records do not have either the paternal or maternal IDs, so
+         * the corresponding IDs are set to null.
+         */
+        NO_PARENTS,
+
+        /**
+         * The PED records do not have the GENDER field, so the sex of each
+         * sample will be set to UNKNOWN.
+         */
+        NO_SEX,
+
+        /**
+         * The PED records do not have the PHENOTYPE field, so the phenotype
+         * of each sample will be set to UNKNOWN.
+         */
+        NO_PHENOTYPE
+    }
+
+    protected enum Field {
+        FAMILY_ID, INDIVIDUAL_ID, PATERNAL_ID, MATERNAL_ID, GENDER, PHENOTYPE
+    }
+
+    // phenotype
+    private final static String MISSING_VALUE1 = "-9";
+    private final static String MISSING_VALUE2 = "0";
+    private final static String PHENOTYPE_UNAFFECTED = "1";
+    private final static String PHENOTYPE_AFFECTED = "2";
+
+    // Sex
+    private final static String SEX_MALE = "1";
+    private final static String SEX_FEMALE = "2";
+    // other=unknown
+
+    public PedReader() { }
+
+    public final List<Sample> parse(File source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) throws FileNotFoundException  {
+        logger.info("Reading PED file " + source + " with missing fields: " + missingFields);
+        return parse(new FileReader(source), missingFields, sampleDB);
+    }
+
+    public final List<Sample> parse(final String source, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
+        logger.warn("Reading PED string: \"" + source + "\" with missing fields: " + missingFields);
+        return parse(new StringReader(source.replace(";", String.format("%n"))), missingFields, sampleDB);
+    }
+
+    public final List<Sample> parse(Reader reader, EnumSet<MissingPedField> missingFields, SampleDB sampleDB) {
+        final List<String> lines = new XReadLines(reader).readLines();
+
+        // What are the record offsets?
+        final int familyPos = missingFields.contains(MissingPedField.NO_FAMILY_ID) ? -1 : 0;
+        final int samplePos = familyPos + 1;
+        final int paternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : samplePos + 1;
+        final int maternalPos = missingFields.contains(MissingPedField.NO_PARENTS) ? -1 : paternalPos + 1;
+        final int sexPos = missingFields.contains(MissingPedField.NO_SEX) ? -1 : Math.max(maternalPos, samplePos) + 1;
+        final int phenotypePos = missingFields.contains(MissingPedField.NO_PHENOTYPE) ? -1 : Math.max(sexPos, Math.max(maternalPos, samplePos)) + 1;
+        final int nExpectedFields = MathUtils.arrayMaxInt(Arrays.asList(samplePos, paternalPos, maternalPos, sexPos, phenotypePos)) + 1;
+
+        // go through once and determine properties
+        int lineNo = 1;
+        boolean isQT = false;
+        final List<String[]> splits = new ArrayList<String[]>(lines.size());
+        for ( final String line : lines ) {
+            if ( line.startsWith(commentMarker)) continue;
+            if ( line.trim().equals("") ) continue;
+
+            final String[] parts = line.split("\\s+");
+
+            if ( parts.length != nExpectedFields )
+                throw new UserException.MalformedFile(reader.toString(), "Bad PED line " + lineNo + ": wrong number of fields");
+
+            if ( phenotypePos != -1 ) {
+                isQT = isQT || ! CATAGORICAL_TRAIT_VALUES.contains(parts[phenotypePos]);
+            }
+
+            splits.add(parts);
+            lineNo++;
+        }
+        logger.info("Phenotype is other? " + isQT);
+
+        // now go through and parse each record
+        lineNo = 1;
+        final List<Sample> samples = new ArrayList<Sample>(splits.size());
+        for ( final String[] parts : splits ) {
+            String familyID = null, individualID, paternalID = null, maternalID = null;
+            Gender sex = Gender.UNKNOWN;
+            String quantitativePhenotype = Sample.UNSET_QT;
+            Affection affection = Affection.UNKNOWN;
+
+            if ( familyPos != -1 ) familyID = maybeMissing(parts[familyPos]);
+            individualID = parts[samplePos];
+            if ( paternalPos != -1 ) paternalID = maybeMissing(parts[paternalPos]);
+            if ( maternalPos != -1 ) maternalID = maybeMissing(parts[maternalPos]);
+
+            if ( sexPos != -1 ) {
+                if ( parts[sexPos].equals(SEX_MALE) ) sex = Gender.MALE;
+                else if ( parts[sexPos].equals(SEX_FEMALE) ) sex = Gender.FEMALE;
+                else sex = Gender.UNKNOWN;
+            }
+
+            if ( phenotypePos != -1 ) {
+                if ( isQT ) {
+                    if ( parts[phenotypePos].equals(MISSING_VALUE1) )
+                        affection = Affection.UNKNOWN;
+                    else {
+                        affection = Affection.OTHER;
+                        quantitativePhenotype = parts[phenotypePos];
+                    }
+                } else {
+                    if ( parts[phenotypePos].equals(MISSING_VALUE1) ) affection = Affection.UNKNOWN;
+                    else if ( parts[phenotypePos].equals(MISSING_VALUE2) ) affection = Affection.UNKNOWN;
+                    else if ( parts[phenotypePos].equals(PHENOTYPE_UNAFFECTED) ) affection = Affection.UNAFFECTED;
+                    else if ( parts[phenotypePos].equals(PHENOTYPE_AFFECTED) ) affection = Affection.AFFECTED;
+                    else throw new ReviewedStingException("Unexpected phenotype type " + parts[phenotypePos] + " at line " + lineNo);
+                }
+            }
+
+            final Sample s = new Sample(individualID, sampleDB, familyID, paternalID, maternalID, sex, affection, quantitativePhenotype);
+            samples.add(s);
+            sampleDB.addSample(s);
+            lineNo++;
+        }
+
+        for ( final Sample sample : new ArrayList<Sample>(samples) ) {
+            Sample dad = maybeAddImplicitSample(sampleDB, sample.getPaternalID(), sample.getFamilyID(), Gender.MALE);
+            if ( dad != null ) samples.add(dad);
+
+            Sample mom = maybeAddImplicitSample(sampleDB, sample.getMaternalID(), sample.getFamilyID(), Gender.FEMALE);
+            if ( mom != null ) samples.add(mom);
+        }
+
+        return samples;
+    }
+
+    private final static String maybeMissing(final String string) {
+        if ( string.equals(MISSING_VALUE1) || string.equals(MISSING_VALUE2) )
+            return null;
+        else
+            return string;
+    }
+
+    private final Sample maybeAddImplicitSample(SampleDB sampleDB, final String id, final String familyID, final Gender gender) {
+        if ( id != null && sampleDB.getSample(id) == null ) {
+            Sample s = new Sample(id, sampleDB, familyID, null, null, gender, Affection.UNKNOWN, Sample.UNSET_QT);
+            sampleDB.addSample(s);
+            return s;
+        } else
+            return null;
+    }
+
+    /**
+     * Parses a list of tags from the command line, assuming it comes from the GATK Engine
+     * tags, and returns the corresponding EnumSet.
+     *
+     * @param arg the actual engine arg, used for the UserException if there's an error
+     * @param tags a list of string tags that should be converted to the MissingPedField value
+     * @return
+     */
+    public static final EnumSet<MissingPedField> parseMissingFieldTags(final Object arg, final List<String> tags) {
+        final EnumSet<MissingPedField> missingFields = EnumSet.noneOf(MissingPedField.class);
+
+        for ( final String tag : tags ) {
+            try {
+                missingFields.add(MissingPedField.valueOf(tag));
+            } catch ( IllegalArgumentException e ) {
+                throw new UserException.BadArgumentValue(arg.toString(), "Unknown tag " + tag + " allowed values are " + MissingPedField.values());
+            }
+        }
+
+        return missingFields;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/PedigreeValidationType.java
@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+/**
+*
+*/
+public enum PedigreeValidationType {
+    /**
+     * Require if a pedigree file is provided at all samples in the VCF or BAM files have a corresponding
+     * entry in the pedigree file(s).
+     */
+    STRICT,
+
+    /**
+     * Do not enforce any overlap between the VCF/BAM samples and the pedigree data
+     * */
+    SILENT
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
@ -0,0 +1,222 @@
+package org.broadinstitute.sting.gatk.samples;
+
+
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ */
+public class Sample implements Comparable<Sample> { // implements java.io.Serializable {
+    final private String familyID, paternalID, maternalID;
+    final private Gender gender;
+    final private String otherPhenotype;
+    final private Affection affection;
+    final private String ID;
+    final private SampleDB infoDB;
+    final private Map<String, Object> properties = new HashMap<String, Object>();
+
+    public final static String UNSET_QT = null;
+
+    public Sample(final String ID, final SampleDB infoDB,
+                  final String familyID, final String paternalID, final String maternalID,
+                  final Gender gender, final Affection affection, final String otherPhenotype) {
+        this.familyID = familyID;
+        this.paternalID = paternalID;
+        this.maternalID = maternalID;
+        this.gender = gender;
+        this.otherPhenotype = otherPhenotype;
+        this.affection = affection;
+        this.ID = ID;
+        this.infoDB = infoDB;
+    }
+
+    protected Sample(final String ID,
+                     final String familyID, final String paternalID, final String maternalID,
+                     final Gender gender, final Affection affection, final String otherPhenotype) {
+        this(ID, null, familyID, paternalID, maternalID, gender, affection, otherPhenotype);
+    }
+
+    protected Sample(final String ID,
+                     final String familyID, final String paternalID, final String maternalID,
+                     final Gender gender, final Affection affection) {
+        this(ID, null, familyID, paternalID, maternalID, gender, affection, UNSET_QT);
+    }
+
+
+    public Sample(final String ID, final SampleDB infoDB,
+                  final String familyID, final String paternalID, final String maternalID, final Gender gender) {
+        this(ID, infoDB, familyID, paternalID, maternalID, gender, Affection.UNKNOWN, UNSET_QT);
+    }
+
+    public Sample(final String ID, final SampleDB infoDB, final Affection affection, final String otherPhenotype) {
+        this(ID, infoDB, null, null, null, Gender.UNKNOWN, affection, otherPhenotype);
+    }
+
+    public Sample(String id, SampleDB infoDB) {
+        this(id, infoDB, null, null, null,
+                Gender.UNKNOWN, Affection.UNKNOWN, UNSET_QT);
+    }
+
+    // -------------------------------------------------------------------------------------
+    //
+    // standard property getters
+    //
+    // -------------------------------------------------------------------------------------
+
+    public String getID() {
+        return ID;
+    }
+
+    public String getFamilyID() {
+        return familyID;
+    }
+
+    public String getPaternalID() {
+        return paternalID;
+    }
+
+    public String getMaternalID() {
+        return maternalID;
+    }
+
+    public Affection getAffection() {
+        return affection;
+    }
+
+    public boolean hasOtherPhenotype() {
+        return affection == Affection.OTHER;
+    }
+
+    public String getOtherPhenotype() {
+        return otherPhenotype;
+    }
+
+    /**
+     * Get the sample's mother
+     * @return sample object with relationship mother, if exists, or null
+     */
+    public Sample getMother() {
+        return infoDB.getSample(maternalID);
+    }
+
+    /**
+     * Get the sample's father
+     * @return sample object with relationship father, if exists, or null
+     */
+    public Sample getFather() {
+        return infoDB.getSample(paternalID);
+    }
+
+    /**
+     * Get gender of the sample
+     * @return property of key "gender" - must be of type Gender
+     */
+    public Gender getGender() {
+        return gender;
+    }
+
+    @Override
+    public int compareTo(final Sample sample) {
+        return ID.compareTo(sample.getID());
+    }
+
+    @Override
+    public String toString() {
+        return String.format("Sample %s fam=%s dad=%s mom=%s gender=%s affection=%s qt=%s props=%s",
+                getID(), getFamilyID(), getPaternalID(), getMaternalID(), getGender(), getAffection(),
+                getOtherPhenotype(), properties);
+    }
+
+//    // -------------------------------------------------------------------------------------
+//    //
+//    // code for working with additional -- none standard -- properites
+//    //
+//    // -------------------------------------------------------------------------------------
+//
+//    public Map<String, Object> getExtraProperties() {
+//        return Collections.unmodifiableMap(properties);
+//    }
+//
+//    /**
+//     * Get one property
+//     * @param key key of property
+//     * @return value of property as generic object
+//     */
+//    public Object getExtraPropertyValue(final String key) {
+//        return properties.get(key);
+//    }
+//
+//    /**
+//     *
+//     * @param key property key
+//     * @return true if sample has this property (even if its value is null)
+//     */
+//    public boolean hasExtraProperty(String key) {
+//        return properties.containsKey(key);
+//    }
+
+    @Override
+    public int hashCode() {
+        return ID.hashCode();
+    }
+
+    @Override
+    public boolean equals(final Object o) {
+        if(o == null)
+            return false;
+        if(o instanceof Sample) {
+            Sample otherSample = (Sample)o;
+            return ID.equals(otherSample.ID) &&
+                    equalOrNull(familyID, otherSample.familyID) &&
+                    equalOrNull(paternalID, otherSample.paternalID) &&
+                    equalOrNull(maternalID, otherSample.maternalID) &&
+                    equalOrNull(gender, otherSample.gender) &&
+                    equalOrNull(otherPhenotype, otherSample.otherPhenotype) &&
+                    equalOrNull(affection, otherSample.affection) &&
+                    equalOrNull(properties, otherSample.properties);
+        }
+        return false;
+    }
+
+    private final static boolean equalOrNull(final Object o1, final Object o2) {
+        if ( o1 == null )
+            return o2 == null;
+        else
+            return o2 == null ? false : o1.equals(o2);
+    }
+
+    private final static <T> T mergeValues(final String name, final String field, final T o1, final T o2, final T emptyValue) {
+        if ( o1 == null || o1.equals(emptyValue) ) {
+            // take o2 if both are null, otherwise keep o2
+            return o2 == null ? null : o2;
+        } else {
+            if ( o2 == null || o2.equals(emptyValue) )
+                return o1; // keep o1, since it's a real value
+            else {
+                // both o1 and o2 have a value
+                if ( o1 == o2 )
+                    return o1;
+                else
+                    throw new UserException("Inconsistent values detected for " + name + " for field " + field + " value1 " + o1 + " value2 " + o2);
+            }
+        }
+    }
+
+    public final static Sample mergeSamples(final Sample prev, final Sample next) {
+        if ( prev.equals(next) )
+            return next;
+        else {
+            return new Sample(prev.getID(), prev.infoDB,
+                    mergeValues(prev.getID(), "Family_ID", prev.getFamilyID(), next.getFamilyID(), null),
+                    mergeValues(prev.getID(), "Paternal_ID", prev.getPaternalID(), next.getPaternalID(), null),
+                    mergeValues(prev.getID(), "Material_ID", prev.getMaternalID(), next.getMaternalID(), null),
+                    mergeValues(prev.getID(), "Gender", prev.getGender(), next.getGender(), Gender.UNKNOWN),
+                    mergeValues(prev.getID(), "Affection", prev.getAffection(), next.getAffection(), Affection.UNKNOWN),
+                    mergeValues(prev.getID(), "OtherPhenotype", prev.getOtherPhenotype(), next.getOtherPhenotype(), UNSET_QT));
+                    //mergeValues(prev.getID(), "ExtraProperties", prev.getExtraProperties(), next.getExtraProperties(), Collections.emptyMap()));
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDB.java
@ -0,0 +1,183 @@
+package org.broadinstitute.sting.gatk.samples;
+
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+import org.broadinstitute.sting.utils.variantcontext.Genotype;
+
+import java.util.*;
+
+/**
+ *
+ */
+public class SampleDB {
+    /**
+     * This is where Sample objects are stored. Samples are usually accessed by their ID, which is unique, so
+     * this is stored as a HashMap.
+     */
+    private final HashMap<String, Sample> samples = new HashMap<String, Sample>();
+
+    /**
+     * Constructor takes both a SAM header and sample files because the two must be integrated.
+     */
+    public SampleDB() {
+
+    }
+
+    /**
+     * Protected function to add a single sample to the database
+     *
+     * @param sample to be added
+     */
+    protected SampleDB addSample(Sample sample) {
+        Sample prev = samples.get(sample.getID());
+        if ( prev != null )
+            sample = Sample.mergeSamples(prev, sample);
+        samples.put(sample.getID(), sample);
+        return this;
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Functions for getting a sample from the DB
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Get a sample by its ID
+     * If an alias is passed in, return the main sample object 
+     * @param id
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(String id) {
+        return samples.get(id);
+    }
+
+    /**
+     *
+     * @param read
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final SAMRecord read) {
+        return getSample(read.getReadGroup());
+    }
+
+    /**
+     *
+     * @param rg
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final SAMReadGroupRecord rg) {
+        return getSample(rg.getSample());
+    }
+
+    /**
+     * @param g Genotype
+     * @return sample Object with this ID, or null if this does not exist
+     */
+    public Sample getSample(final Genotype g) {
+        return getSample(g.getSampleName());
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Functions for accessing samples in the DB
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Get number of sample objects
+     * @return size of samples map
+     */
+    public int sampleCount() {
+        return samples.size();
+    }
+
+    public Set<Sample> getSamples() {
+        return new HashSet<Sample>(samples.values());
+    }
+
+    public Collection<String> getSampleNames() {
+        return Collections.unmodifiableCollection(samples.keySet());
+    }
+
+
+    /**
+     * Takes a collection of sample names and returns their corresponding sample objects
+     * Note that, since a set is returned, if you pass in a list with duplicates names there will not be any duplicates in the returned set
+     * @param sampleNameList Set of sample names
+     * @return Corresponding set of samples
+     */
+    public Set<Sample> getSamples(Collection<String> sampleNameList) {
+        HashSet<Sample> samples = new HashSet<Sample>();
+        for (String name : sampleNameList) {
+            try {
+                samples.add(getSample(name));
+            }
+            catch (Exception e) {
+                throw new StingException("Could not get sample with the following ID: " + name, e);
+            }
+        }
+        return samples;
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Higher level pedigree functions
+    //
+    // --------------------------------------------------------------------------------
+
+    /**
+     * Returns a sorted set of the family IDs in all samples (excluding null ids)
+     * @return
+     */
+    public final Set<String> getFamilyIDs() {
+        return getFamilies().keySet();
+    }
+
+    /**
+     * Returns a map from family ID -> set of family members for all samples with
+     * non-null family ids
+     *
+     * @return
+     */
+    public final Map<String, Set<Sample>> getFamilies() {
+        final Map<String, Set<Sample>> families = new TreeMap<String, Set<Sample>>();
+
+        for ( final Sample sample : samples.values() ) {
+            final String famID = sample.getFamilyID();
+            if ( famID != null ) {
+                if ( ! families.containsKey(famID) )
+                    families.put(famID, new TreeSet<Sample>());
+                families.get(famID).add(sample);
+            }
+        }
+
+        return families;
+    }
+
+    /**
+     * Return all samples with a given family ID
+     * @param familyId
+     * @return
+     */
+    public Set<Sample> getFamily(String familyId) {
+        return getFamilies().get(familyId);
+    }
+
+    /**
+     * Returns all children of a given sample
+     * See note on the efficiency of getFamily() - since this depends on getFamily() it's also not efficient
+     * @param sample
+     * @return
+     */
+    public Set<Sample> getChildren(Sample sample) {
+        final HashSet<Sample> children = new HashSet<Sample>();
+        for ( final Sample familyMember : getFamily(sample.getFamilyID())) {
+            if ( familyMember.getMother() == sample || familyMember.getFather() == sample ) {
+                children.add(familyMember);
+            }
+        }
+        return children;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/SampleDBBuilder.java
@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.*;
+
+/**
+ *
+ */
+public class SampleDBBuilder {
+    PedigreeValidationType validationStrictness;
+    final SampleDB sampleDB = new SampleDB();
+    final GenomeAnalysisEngine engine;
+
+    Set<Sample> samplesFromDataSources = new HashSet<Sample>();
+    Set<Sample> samplesFromPedigrees = new HashSet<Sample>();
+
+    /** for testing only */
+    protected SampleDBBuilder(PedigreeValidationType validationStrictness) {
+        engine = null;
+        this.validationStrictness = validationStrictness;
+    }
+
+    /**
+     * Constructor takes both a SAM header and sample files because the two must be integrated.
+     */
+    public SampleDBBuilder(GenomeAnalysisEngine engine, PedigreeValidationType validationStrictness) {
+        this.engine = engine;
+        this.validationStrictness = validationStrictness;
+    }
+
+    /**
+     * Hallucinates sample objects for all the samples in the SAM file and stores them
+     */
+    public SampleDBBuilder addSamplesFromSAMHeader(final SAMFileHeader header) {
+        addSamplesFromSampleNames(SampleUtils.getSAMFileSamples(header));
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromSampleNames(final Collection<String> sampleNames) {
+        for (final String sampleName : sampleNames) {
+            if (sampleDB.getSample(sampleName) == null) {
+                final Sample newSample = new Sample(sampleName, sampleDB);
+                sampleDB.addSample(newSample);
+                samplesFromDataSources.add(newSample); // keep track of data source samples
+            }
+        }
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromPedigreeFiles(final List<File> pedigreeFiles) {
+        for (final File pedFile : pedigreeFiles) {
+            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedFile);
+            samplesFromPedigrees.addAll(samples);
+        }
+
+        return this;
+    }
+
+    public SampleDBBuilder addSamplesFromPedigreeStrings(final List<String> pedigreeStrings) {
+        for (final String pedString : pedigreeStrings) {
+            Collection<Sample> samples = addSamplesFromPedigreeArgument(pedString);
+            samplesFromPedigrees.addAll(samples);
+        }
+
+        return this;
+    }
+
+    /**
+     * Parse one sample file and integrate it with samples that are already there
+     * Fail quickly if we find any errors in the file
+     */
+    private Collection<Sample> addSamplesFromPedigreeArgument(File sampleFile) {
+        final PedReader reader = new PedReader();
+
+        try {
+            return reader.parse(sampleFile, getMissingFields(sampleFile), sampleDB);
+        } catch ( FileNotFoundException e ) {
+            throw new UserException.CouldNotReadInputFile(sampleFile, e);
+        }
+    }
+
+    private Collection<Sample> addSamplesFromPedigreeArgument(final String string) {
+        final PedReader reader = new PedReader();
+        return reader.parse(string, getMissingFields(string), sampleDB);
+    }
+
+    public SampleDB getFinalSampleDB() {
+        validate();
+        return sampleDB;
+    }
+
+    public EnumSet<PedReader.MissingPedField> getMissingFields(final Object engineArg) {
+        if ( engine == null )
+            return EnumSet.noneOf(PedReader.MissingPedField.class);
+        else {
+            final List<String> posTags = engine.getTags(engineArg).getPositionalTags();
+            return PedReader.parseMissingFieldTags(engineArg, posTags);
+        }
+    }
+
+    // --------------------------------------------------------------------------------
+    //
+    // Validation
+    //
+    // --------------------------------------------------------------------------------
+
+    protected final void validate() {
+        if ( validationStrictness == PedigreeValidationType.SILENT )
+            return;
+        else {
+            // check that samples in data sources are all annotated, if anything is annotated
+            if ( ! samplesFromPedigrees.isEmpty() && ! samplesFromDataSources.isEmpty() ) {
+                final Set<String> sampleNamesFromPedigrees = new HashSet<String>();
+                for ( final Sample pSample : samplesFromPedigrees )
+                    sampleNamesFromPedigrees.add(pSample.getID());
+
+                for ( final Sample dsSample : samplesFromDataSources )
+                    if ( ! sampleNamesFromPedigrees.contains(dsSample.getID()) )
+                        throw new UserException("Sample " + dsSample.getID() + " found in data sources but not in pedigree files with STRICT pedigree validation");
+            }
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Walker.java
@ -30,11 +30,12 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.filters.MalformedReadFilter;
+import org.broadinstitute.sting.gatk.samples.Sample;
+import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.help.GenericDocumentationHandler;

 import java.util.List;

@ -87,6 +88,14 @@ public abstract class Walker<MapType, ReduceType> {
        return getToolkit().getMasterSequenceDictionary();
    }

+    protected SampleDB getSampleDB() {
+        return getToolkit().getSampleDB();
+    }
+
+    protected Sample getSample(final String id) {
+        return getToolkit().getSampleDB().getSample(id);
+    }
+
    /**
     * (conceptual static) method that states whether you want to see reads piling up at a locus
     * that contain a deletion at the locus.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/beagle/ProduceBeagleInputWalker.java
@ -31,6 +31,7 @@ import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgume
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.samples.Gender;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.variantrecalibration.VQSRCalibrationCurve;
 import org.broadinstitute.sting.utils.GenomeLoc;
@ -247,7 +248,7 @@ public class ProduceBeagleInputWalker extends RodWalker<Integer, Integer> {
        Map<String,Genotype> preferredGenotypes = preferredVC.getGenotypes();
        Map<String,Genotype> otherGenotypes = goodSite(otherVC) ? otherVC.getGenotypes() : null;
        for ( String sample : samples ) {
-            boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getToolkit().getSampleById(sample).isMale();
+            boolean isMaleOnChrX = CHECK_IS_MALE_ON_CHR_X && getSample(sample).getGender() == Gender.MALE;

            Genotype genotype;
            boolean isValidation;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalker.java
@ -227,9 +227,8 @@ public class CallableLociWalker extends LocusWalker<CallableLociWalker.CallableB

    @Override
    public void initialize() {
-        if ( getToolkit().getSamples().size() != 2 ) {
-            // unbelievably there are actually two samples even when there's just one in the header.  God I hate this Samples system
-            throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getToolkit().getSamples());
+        if ( getSampleDB().getSamples().size() != 1 ) {
+            throw new UserException.BadArgumentValue("-I", "CallableLoci only works for a single sample, but multiple samples were found in the provided BAM files: " + getSampleDB().getSamples());
        }

        try {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageWalker.java
@ -32,6 +32,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
+import org.broadinstitute.sting.utils.SampleUtils;
 import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
 import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
@ -281,20 +282,14 @@ public class DepthOfCoverageWalker extends LocusWalker<Map<DoCOutputType.Partiti
    private HashSet<String> getSamplesFromToolKit(DoCOutputType.Partition type) {
        HashSet<String> partition = new HashSet<String>();
        if ( type == DoCOutputType.Partition.sample ) {
-            for ( Set<String> sampleSet : getToolkit().getSamplesByReaders() ) {
-                for ( String s : sampleSet ) {
-                    partition.add(s);
-                }
-            }
+            partition.addAll(SampleUtils.getSAMFileSamples(getToolkit()));
        } else if ( type == DoCOutputType.Partition.readgroup ) {
            for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
                partition.add(rg.getSample()+"_rg_"+rg.getReadGroupId());
            }
        } else if ( type == DoCOutputType.Partition.library ) {
-            for ( Set<String> libraries : getToolkit().getLibrariesByReaders() ) {
-                for ( String l : libraries ) {
-                    partition.add(l);
-                }
+            for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
+                partition.add(rg.getLibrary());
            }
        } else if ( type == DoCOutputType.Partition.center ) {
            for ( SAMReadGroupRecord rg : getToolkit().getSAMFileHeader().getReadGroups() ) {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@ -248,6 +248,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
    @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
    protected String N_WAY_OUT = null;

+    @Hidden
+    @Argument(fullName="generate_nWayOut_md5s",doc="Generate md5sums for BAMs")
+    protected boolean generateMD5s = false;

    // DEBUGGING OPTIONS FOLLOW

@ -401,9 +404,9 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
 //           if ( args.containsKey("disable_bam_indexing") )  { System.out.println("NO INDEXING!!"); System.exit(1); createIndex = false; }

            if ( N_WAY_OUT.toUpperCase().endsWith(".MAP") ) {
-                writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex);
+                writerToUse = new NWaySAMFileWriter(getToolkit(),loadFileNameMap(N_WAY_OUT),SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
            } else {
-                writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex);
+                writerToUse = new NWaySAMFileWriter(getToolkit(),N_WAY_OUT,SAMFileHeader.SortOrder.coordinate,true, createIndex, generateMD5s);
            }

        }   else {
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@ -32,16 +32,11 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;

-import java.io.File;
-import java.io.FileWriter;
-import java.io.PrintStream;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.LinkedHashMap;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/SomaticIndelDetectorWalker.java
@ -36,12 +36,8 @@ import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
 import org.broadinstitute.sting.gatk.filters.Platform454Filter;
 import org.broadinstitute.sting.gatk.filters.PlatformUnitFilter;
-import org.broadinstitute.sting.gatk.filters.PlatformUnitFilterHelper;
 import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.SeekableRODIterator;
-import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
-import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
-import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrack;
 import org.broadinstitute.sting.gatk.refdata.tracks.RMDTrackBuilder;
 import org.broadinstitute.sting.gatk.refdata.utils.LocationAwareSeekableRODIterator;
@ -51,6 +47,9 @@ import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.codecs.refseq.RefSeqCodec;
+import org.broadinstitute.sting.utils.codecs.refseq.RefSeqFeature;
+import org.broadinstitute.sting.utils.codecs.refseq.Transcript;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
 import org.broadinstitute.sting.utils.collections.CircularArray;
 import org.broadinstitute.sting.utils.collections.PrimitivePair;
@ -392,7 +391,7 @@ public class SomaticIndelDetectorWalker extends ReadWalker<Integer,Integer> {

 		location = getToolkit().getGenomeLocParser().createGenomeLoc(getToolkit().getSAMFileHeader().getSequence(0).getSequenceName(),1);

-        normalSamples = getToolkit().getSamplesByReaders().get(0);
+        normalSamples = SampleUtils.getSAMFileSamples(getToolkit().getSAMFileHeaders().get(0));

        try {
            // we already checked that bedOutput and output_file are not set simultaneously
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasingWalker.java
@ -30,7 +30,6 @@ import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.arguments.StandardVariantContextInputArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.gatk.filters.MappingQualityZeroFilter;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
@ -1095,14 +1094,14 @@ public class ReadBackedPhasingWalker extends RodWalker<PhasingStatsAndOutput, Ph
                    // filter the read-base pileup based on min base and mapping qualities:
                    pileup = pileup.getBaseAndMappingFilteredPileup(MIN_BASE_QUALITY_SCORE, MIN_MAPPING_QUALITY_SCORE);
                    if (pileup != null) {
-                        for (Sample sample : pileup.getSamples()) {
+                        for (final String sample : pileup.getSamples()) {
                            ReadBackedPileup samplePileup = pileup.getPileupForSample(sample);
                            ReadBasesAtPosition readBases = new ReadBasesAtPosition();
                            for (PileupElement p : samplePileup) {
                                if (!p.isDeletion()) // IGNORE deletions for now
                                    readBases.putReadBase(p);
                            }
-                            sampleReadBases.put(sample.getId(), readBases);
+                            sampleReadBases.put(sample, readBases);
                        }
                    }
                }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountMalesWalker.java
@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.qc;
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.samples.Gender;
+import org.broadinstitute.sting.gatk.samples.Sample;
+import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.gatk.walkers.Requires;
+
+/**
+ * Walks over the input data set, calculating the number of reads seen for diagnostic purposes.
+ * Can also count the number of reads matching a given criterion using read filters (see the
+ * --read-filter command line argument).  Simplest example of a read-backed analysis.
+ */
+@Requires({DataSource.READS, DataSource.REFERENCE})
+public class CountMalesWalker extends ReadWalker<Integer, Integer> {
+    public Integer map(ReferenceContext ref, SAMRecord read, ReadMetaDataTracker tracker) {
+        Sample sample = getSampleDB().getSample(read);
+        return sample.getGender() == Gender.MALE ? 1 : 0;
+    }
+
+    public Integer reduceInit() { return 0; }
+
+    public Integer reduce(Integer value, Integer sum) {
+        return value + sum;
+    }
+}                                       
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/CountVariants.java
@ -130,6 +130,10 @@ public class CountVariants extends VariantEvaluator implements StandardEval {
                    nVariantLoci++;
                    nMixed++;
                    break;
+                case SYMBOLIC:
+                    // ignore symbolic alleles, but don't fail
+                    // todo - consistent way of treating symbolic alleles thgoughout codebase?
+                    break;
                default:
                    throw new ReviewedStingException("Unexpected VariantContext type " + vc1.getType());
            }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/VariantsToTable.java
@ -133,7 +133,7 @@ public class VariantsToTable extends RodWalker<Integer, Integer> {

    /**
     * By default, this tool throws a UserException when it encounters a field without a value in some record.  This
-     * is generally useful when you mistype -F CHRMO, so that you get a friendly warning about CHRMO not being
+     * is generally useful when you mistype -F CHROM, so that you get a friendly warning about CHRMO not being
     * found before the tool runs through 40M 1000G records.  However, in some cases you genuinely want to allow such
     * fields (e.g., AC not being calculated for filtered records, if included).  When provided, this argument
     * will cause VariantsToTable to write out NA values for missing fields instead of throwing an error.
--- a/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MendelianViolation.java
@ -1,13 +1,10 @@
 package org.broadinstitute.sting.utils;

-import org.apache.commons.lang.ArrayUtils;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
+import org.broadinstitute.sting.gatk.samples.Sample;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.variantcontext.Genotype;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;

-import java.util.Arrays;
 import java.util.Collection;
 import java.util.List;
 import java.util.regex.Matcher;
@ -19,9 +16,6 @@ import java.util.regex.Pattern;
 * Time: 12:38 PM
 */
 public class MendelianViolation {
-
-
-
    String sampleMom;
    String sampleDad;
    String sampleChild;
@ -34,22 +28,15 @@ public class MendelianViolation {

    private static Pattern FAMILY_PATTERN = Pattern.compile("(.*)\\+(.*)=(.*)");

-    static final int[] mvOffsets = new int[] { 1,2,5,6,8,11,15,18,20,21,24,25 };
-    static final int[] nonMVOffsets = new int[]{ 0,3,4,7,9,10,12,13,14,16,17,19,22,23,26 };
-
-
    public String getSampleMom() {
        return sampleMom;
    }
-
    public String getSampleDad() {
        return sampleDad;
    }
-
    public String getSampleChild() {
        return sampleChild;
    }
-
    public double getMinGenotypeQuality() {
        return minGenotypeQuality;
    }
@ -90,37 +77,12 @@ public class MendelianViolation {
     * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
     */
    public MendelianViolation(Sample sample, double minGenotypeQualityP) {
-        sampleMom = sample.getMother().getId();
-        sampleDad = sample.getFather().getId();
-        sampleChild = sample.getId();
+        sampleMom = sample.getMother().getID();
+        sampleDad = sample.getFather().getID();
+        sampleChild = sample.getID();
        minGenotypeQuality = minGenotypeQualityP;
    }

-
-    /**
-     * The most common constructor to be used when give a YAML file with the relationships to the engine with the -SM option.
-     * @param engine - The GATK engine, use getToolkit(). That's where the sample information is stored.
-     * @param minGenotypeQualityP - the minimum phred scaled genotype quality score necessary to asses mendelian violation
-     */
-    public MendelianViolation(GenomeAnalysisEngine engine, double minGenotypeQualityP) {
-        boolean gotSampleInformation = false;
-        Collection<Sample> samples = engine.getSamples();
-        // Iterate through all samples in the sample_metadata file but we really can only take one.
-        for (Sample sample : samples) {
-            if (sample.getMother() != null && sample.getFather() != null) {
-                sampleMom = sample.getMother().getId();
-                sampleDad = sample.getFather().getId();
-                sampleChild = sample.getId();
-                minGenotypeQuality = minGenotypeQualityP;
-                gotSampleInformation = true;
-                break; // we can only deal with one trio information
-            }
-        }
-        if (!gotSampleInformation)
-            throw new UserException("YAML file has no sample with relationship information (mother/father)");
-    }
-
-
    /**
     * This method prepares the object to evaluate for violation. Typically you won't call it directly, a call to
     * isViolation(vc) will take care of this. But if you want to know whether your site was a valid comparison site
@ -158,7 +120,7 @@ public class MendelianViolation {
     * @return False if we can't determine (lack of information), or it's not a violation. True if it is a violation.
     *
     */
-    public boolean isViolation (VariantContext vc)
+    public boolean isViolation(VariantContext vc)
    {
        return setAlleles(vc) && isViolation();
    }
@ -172,42 +134,4 @@ public class MendelianViolation {
            return false;
        return true;
    }
-
-    /**
-     * @return the likelihood ratio for a mendelian violation
-     */
-    public double violationLikelihoodRatio(VariantContext vc) {
-        double[] logLikAssignments = new double[27];
-        // the matrix to set up is
-        // MOM   DAD    CHILD
-        //                    |-  AA
-        //   AA     AA    |    AB
-        //                    |-   BB
-        //                    |- AA
-        //  AA     AB     |   AB
-        //                    |- BB
-        // etc. The leaves are counted as 0-11 for MVs and 0-14 for non-MVs
-        double[] momGL = vc.getGenotype(sampleMom).getLikelihoods().getAsVector();
-        double[] dadGL = vc.getGenotype(sampleDad).getLikelihoods().getAsVector();
-        double[] childGL = vc.getGenotype(sampleChild).getLikelihoods().getAsVector();
-        int offset = 0;
-        for ( int oMom = 0; oMom < 3; oMom++ ) {
-            for ( int oDad = 0; oDad < 3; oDad++ ) {
-                for ( int oChild = 0; oChild < 3; oChild ++ ) {
-                    logLikAssignments[offset++] = momGL[oMom] + dadGL[oDad] + childGL[oChild];
-                }
-            }
-        }
-        double[] mvLiks = new double[12];
-        double[] nonMVLiks = new double[15];
-        for ( int i = 0; i < 12; i ++ ) {
-            mvLiks[i] = logLikAssignments[mvOffsets[i]];
-        }
-
-        for ( int i = 0; i < 15; i++) {
-            nonMVLiks[i] = logLikAssignments[nonMVOffsets[i]];
-        }
-
-        return MathUtils.log10sumLog10(mvLiks) - MathUtils.log10sumLog10(nonMVLiks);
-    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/SampleUtils.java
@ -69,6 +69,18 @@ public class SampleUtils {
        return samples;
    }

+
+    /**
+     * Same as @link getSAMFileSamples but gets all of the samples
+     * in the SAM files loaded by the engine
+     *
+     * @param engine
+     * @return
+     */
+    public final static Set<String> getSAMFileSamples(GenomeAnalysisEngine engine) {
+        return SampleUtils.getSAMFileSamples(engine.getSAMFileHeader());
+    }
+
    /**
     * Gets all of the unique sample names from all VCF rods input by the user
     *
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
@ -4,7 +4,6 @@ import com.google.java.contract.Requires;
 import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ReadUtils;

--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -215,6 +215,10 @@ public class UserException extends ReviewedStingException {
            super(String.format("File %s is malformed: %s caused by %s", f.getAbsolutePath(), message, e.getMessage()));
        }

+        public MalformedFile(String name, String message) {
+            super(String.format("File associated with name %s is malformed: %s", name, message));
+        }
+
        public MalformedFile(String name, String message, Exception e) {
            super(String.format("File associated with name %s is malformed: %s caused by %s", name, message, e.getMessage()));
        }
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@ -26,11 +26,9 @@ package org.broadinstitute.sting.utils.pileup;

 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-import org.broadinstitute.sting.utils.exceptions.StingException;

 import java.util.*;

@ -114,10 +112,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
        calculateCachedData();
    }

-    protected AbstractReadBackedPileup(GenomeLoc loc, Map<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
+    protected AbstractReadBackedPileup(GenomeLoc loc, Map<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupsBySample) {
        this.loc = loc;
        PerSamplePileupElementTracker<PE> tracker = new PerSamplePileupElementTracker<PE>();
-        for(Map.Entry<Sample,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
+        for(Map.Entry<String,? extends AbstractReadBackedPileup<RBP,PE>> pileupEntry: pileupsBySample.entrySet()) {
            tracker.addElements(pileupEntry.getKey(),pileupEntry.getValue().pileupElementTracker);
            addPileupToCumulativeStats(pileupEntry.getValue());
        }
@ -213,7 +211,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
                PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
                PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-                for(Sample sample: tracker.getSamples()) {
+                for(final String sample: tracker.getSamples()) {
                    PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                    AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutDeletions();
                    filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -251,7 +249,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getOverlappingFragmentFilteredPileup();
                filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -305,7 +303,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
                PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
                PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-                for(Sample sample: tracker.getSamples()) {
+                for(final String sample: tracker.getSamples()) {
                    PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                    AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupWithoutMappingQualityZeroReads();
                    filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -334,7 +332,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPositiveStrandPileup();
                filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -363,7 +361,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getNegativeStrandPileup();
                filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -393,7 +391,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getFilteredPileup(filter);
                filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -425,7 +423,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getBaseAndMappingFilteredPileup(minBaseQ,minMapQ);
                filteredTracker.addElements(sample,pileup.pileupElementTracker);
@ -492,7 +490,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForReadGroup(targetReadGroupId);
                if(pileup != null)
@ -523,7 +521,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
                AbstractReadBackedPileup<RBP,PE> pileup = createNewPileup(loc,perSampleElements).getPileupForLane(laneID);
                if(pileup != null)
@ -550,14 +548,10 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
        }
    }

-    public Collection<String> getSampleNames() {
+    public Collection<String> getSamples() {
        if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
-            Collection<String> sampleNames = new HashSet<String>();
-            for (Sample sample : tracker.getSamples()) {
-                sampleNames.add(sample.getId());
-            }
-            return sampleNames;
+            return new HashSet<String>(tracker.getSamples());
        }
        else {
            Collection<String> sampleNames = new HashSet<String>();
@ -570,16 +564,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
        }
    }

-    @Override
-    public Collection<Sample> getSamples() {
-        if(!(pileupElementTracker instanceof PerSamplePileupElementTracker)) {
-            throw new StingException("Must be an instance of PerSampleElementTracker");
-        }
-        PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
-        return tracker.getSamples();
-    }
-
-
    /**
     * Returns a pileup randomly downsampled to the desiredCoverage.
     *
@ -604,7 +588,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil

            int current = 0;

-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);

                List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
@ -639,7 +623,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
    }

    @Override
-    public RBP getPileupForSampleNames(Collection<String> sampleNames) {
+    public RBP getPileupForSamples(Collection<String> sampleNames) {
        if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PileupElementTracker<PE> filteredElements = tracker.getElements(sampleNames);
@ -665,7 +649,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil


    @Override
-    public RBP getPileupForSampleName(String sampleName) {
+    public RBP getPileupForSample(String sampleName) {
        if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
            PileupElementTracker<PE> filteredElements = tracker.getElements(sampleName);
@ -688,30 +672,6 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
        }
    }

-    @Override
-    public RBP getPileupForSample(Sample sample) {
-        if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
-            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
-            PileupElementTracker<PE> filteredElements = tracker.getElements(sample);
-            return filteredElements != null ? (RBP)createNewPileup(loc,filteredElements) : null;
-        }
-        else {
-            UnifiedPileupElementTracker<PE> filteredTracker = new UnifiedPileupElementTracker<PE>();
-            for(PE p: pileupElementTracker) {
-                SAMRecord read = p.getRead();
-                if(sample != null) {
-                    if(read.getReadGroup() != null && sample.getId().equals(read.getReadGroup().getSample()))
-                        filteredTracker.add(p);
-                }
-                else {
-                    if(read.getReadGroup() == null || read.getReadGroup().getSample() == null)
-                        filteredTracker.add(p);
-                }
-            }
-            return filteredTracker.size()>0 ? (RBP)createNewPileup(loc,filteredTracker) : null;
-        }
-    }
-
    // --------------------------------------------------------
    //
    // iterators
@ -801,7 +761,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil

        if(pileupElementTracker instanceof PerSamplePileupElementTracker) {
            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>)pileupElementTracker;
-            for(Sample sample: tracker.getSamples()) {
+            for(final String sample: tracker.getSamples()) {
                int[] countsBySample = createNewPileup(loc,tracker.getElements(sample)).getBaseCounts();
                for(int i = 0; i < counts.length; i++)
                    counts[i] += countsBySample[i];
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/MergingPileupElementIterator.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.utils.pileup;

 import net.sf.picard.util.PeekableIterator;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;

 import java.util.Comparator;
 import java.util.Iterator;
@ -42,7 +41,7 @@ class MergingPileupElementIterator<PE extends PileupElement> implements Iterator

    public MergingPileupElementIterator(PerSamplePileupElementTracker<PE> tracker) {
        perSampleIterators = new PriorityQueue<PeekableIterator<PE>>(Math.max(1,tracker.getSamples().size()),new PileupElementIteratorComparator());
-        for(Sample sample: tracker.getSamples()) {
+        for(final String sample: tracker.getSamples()) {
            PileupElementTracker<PE> trackerPerSample = tracker.getElements(sample);
            if(trackerPerSample.size() != 0)
                perSampleIterators.add(new PeekableIterator<PE>(trackerPerSample.iterator()));
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
@ -24,8 +24,6 @@

 package org.broadinstitute.sting.utils.pileup;

-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
-
 import java.util.*;

 /**
@ -60,52 +58,35 @@ class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElemen
 }

 class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
-    private final Map<Sample,PileupElementTracker<PE>> pileup;
-    private final Map<String, Sample> sampleNames = new HashMap<String, Sample>();
+    private final Map<String,PileupElementTracker<PE>> pileup;
    private int size = 0;

    public PerSamplePileupElementTracker() {
-        pileup = new HashMap<Sample,PileupElementTracker<PE>>();
-    }
-
-    public PerSamplePileupElementTracker(Map<Sample,AbstractReadBackedPileup<?,PE>> pileupsBySample) {
-        pileup = new HashMap<Sample,PileupElementTracker<PE>>();
-        for(Map.Entry<Sample,AbstractReadBackedPileup<?,PE>> entry: pileupsBySample.entrySet()) {
-            Sample sample = entry.getKey();
-            AbstractReadBackedPileup<?,PE> pileupBySample = entry.getValue();
-            pileup.put(sample,pileupBySample.pileupElementTracker);
-            sampleNames.put(sample.getId(), sample);
-        }
+        pileup = new HashMap<String,PileupElementTracker<PE>>();
    }

    /**
     * Gets a list of all the samples stored in this pileup.
     * @return List of samples in this pileup.
     */
-    public Collection<Sample> getSamples() {
+    public Collection<String> getSamples() {
        return pileup.keySet();
    }

-    public PileupElementTracker<PE> getElements(final Sample sample) {
+    public PileupElementTracker<PE> getElements(final String sample) {
        return pileup.get(sample);
    }

-    public PileupElementTracker<PE> getElements(final String sampleName) {
-        return pileup.get(sampleNames.get(sampleName));
-    }
-
    public PileupElementTracker<PE> getElements(final Collection<String> selectSampleNames) {
        PerSamplePileupElementTracker<PE> result = new PerSamplePileupElementTracker<PE>();
-        for (String sample :  selectSampleNames) {
-            Sample sampleObject = sampleNames.get(sample);
-            result.addElements(sampleObject, pileup.get(sampleObject));
+        for (final String sample :  selectSampleNames) {
+            result.addElements(sample, pileup.get(sample));
        }
        return result;
    }

-    public void addElements(final Sample sample, PileupElementTracker<PE> elements) {
+    public void addElements(final String sample, PileupElementTracker<PE> elements) {
        pileup.put(sample,elements);
-        sampleNames.put(sample.getId(), sample);
        size += elements.size();
    }

--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileup.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.utils.pileup;

 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.collections.Pair;

@ -121,20 +120,7 @@ public interface ReadBackedExtendedEventPileup extends ReadBackedPileup {
     * Gets a list of all the samples stored in this pileup.
     * @return List of samples in this pileup.
     */
-    public Collection<String> getSampleNames();
-
-        /**
-     * Gets a list of all the samples stored in this pileup.
-     * @return List of samples in this pileup.
-     */
-    public Collection<Sample> getSamples();
-
-    /**
-     * Gets the particular subset of this pileup with the given sample name.
-     * @param sample Name of the sample to use.
-     * @return A subset of this pileup containing only reads with the given sample.
-     */
-    public ReadBackedExtendedEventPileup getPileupForSample(Sample sample);
+    public Collection<String> getSamples();

    public Iterable<ExtendedEventPileupElement> toExtendedIterable();

--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedExtendedEventPileupImpl.java
@ -24,7 +24,6 @@
 package org.broadinstitute.sting.utils.pileup;

 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -56,7 +55,7 @@ public class ReadBackedExtendedEventPileupImpl extends AbstractReadBackedPileup<
    }

    // this is the good new one
-    public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<Sample,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
+    public ReadBackedExtendedEventPileupImpl(GenomeLoc loc, Map<String,? extends ReadBackedExtendedEventPileupImpl> pileupElementsBySample) {
        super(loc,pileupElementsBySample);
    }

--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
@ -25,7 +25,6 @@
 package org.broadinstitute.sting.utils.pileup;

 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.HasGenomeLocation;

@ -137,18 +136,11 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
     */
    public ReadBackedPileup getPileupForLane(String laneID);

-
-     /**
-     * Gets a collection of all the samples stored in this pileup.
-     * @return Collection of samples in this pileup.
-     */
-    public Collection<Sample> getSamples();
-
    /**
     * Gets a collection of *names* of all the samples stored in this pileup.
     * @return Collection of names
     */
-    public Collection<String> getSampleNames();
+    public Collection<String> getSamples();


    /**
@ -156,7 +148,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
     * @param sampleNames Name of the sample to use.
     * @return A subset of this pileup containing only reads with the given sample.
     */
-    public ReadBackedPileup getPileupForSampleNames(Collection<String> sampleNames);
+    public ReadBackedPileup getPileupForSamples(Collection<String> sampleNames);


    /**
@ -164,14 +156,7 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
     * @param sampleName Name of the sample to use.
     * @return A subset of this pileup containing only reads with the given sample.
     */
-    public ReadBackedPileup getPileupForSampleName(String sampleName);
-
-    /**
-     * Gets the particular subset of this pileup with the given sample.
-     * @param sample Sample to use.
-     * @return A subset of this pileup containing only reads with the given sample.
-     */
-    public ReadBackedPileup getPileupForSample(Sample sample);
+    public ReadBackedPileup getPileupForSample(String sampleName);
    
    /**
     * Simple useful routine to count the number of deletion bases in this pileup
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileupImpl.java
@ -24,7 +24,6 @@
 package org.broadinstitute.sting.utils.pileup;

 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.GenomeLoc;

 import java.util.List;
@ -48,7 +47,7 @@ public class ReadBackedPileupImpl extends AbstractReadBackedPileup<ReadBackedPil
        super(loc,pileupElements);
    }

-    public ReadBackedPileupImpl(GenomeLoc loc, Map<Sample,ReadBackedPileupImpl> pileupElementsBySample) {
+    public ReadBackedPileupImpl(GenomeLoc loc, Map<String,ReadBackedPileupImpl> pileupElementsBySample) {
        super(loc,pileupElementsBySample);
    }

--- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
@ -51,18 +51,18 @@ public class NWaySAMFileWriter implements SAMFileWriter {
    private boolean presorted ;
    GenomeAnalysisEngine toolkit;

-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
        this.presorted = presorted;
        this.toolkit = toolkit;
        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly);
+        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5);
    }

-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly ) {
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
        this.presorted = presorted;
        this.toolkit = toolkit;
        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,ext,order, presorted, indexOnTheFly);
+        setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5);
    }


@ -73,8 +73,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
     * @param toolkit
     * @param in2out
     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
-
+    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
        if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {

@ -88,7 +87,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
            if ( writerMap.containsKey( rid ) )
                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");

-            addWriter(rid,outName, order, presorted, indexOnTheFly);
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
        }

    }
@ -101,7 +100,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
     * @param toolkit
     * @param ext
     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
+    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {

            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
@ -119,16 +118,19 @@ public class NWaySAMFileWriter implements SAMFileWriter {
            if ( writerMap.containsKey( rid ) )
                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");

-            addWriter(rid,outName, order, presorted, indexOnTheFly);
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5);
        }

    }

-    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly) {
+    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
        File f = new File(outName);
        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
        header.setSortOrder(order);
-        SAMFileWriter sw = new SAMFileWriterFactory().setCreateIndex(indexOnTheFly).makeSAMOrBAMWriter(header, presorted, f);
+        SAMFileWriterFactory factory = new SAMFileWriterFactory();
+        factory.setCreateIndex(indexOnTheFly);
+        factory.setCreateMd5File(generateMD5);
+        SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
        writerMap.put(id,sw);
    }

--- a/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java
+++ b/public/java/src/org/broadinstitute/sting/utils/text/XReadLines.java
@ -99,9 +99,9 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
     *
     * @param reader
     */
-    public XReadLines(final BufferedReader reader, final boolean trimWhitespace) {
+    public XReadLines(final Reader reader, final boolean trimWhitespace) {
        try {
-            this.in = reader;
+            this.in = new BufferedReader(reader);
            nextline = readNextLine();
            this.trimWhitespace = trimWhitespace;
        } catch(IOException e) {
@ -109,7 +109,7 @@ public class XReadLines implements Iterator<String>, Iterable<String> {
        }
    }

-    public XReadLines(final BufferedReader reader) throws FileNotFoundException {
+    public XReadLines(final Reader reader) {
        this(reader, true);
    }

--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@ -132,7 +132,7 @@ import java.util.*;
 * vc.hasGenotypes()
 * vc.isMonomorphic()
 * vc.isPolymorphic()
- * vc.getSampleNames().size()
+ * vc.getSamples().size()
 *
 * vc.getGenotypes()
 * vc.getGenotypes().get("g1")
--- a/public/java/test/org/broadinstitute/sting/MD5DB.java
+++ b/public/java/test/org/broadinstitute/sting/MD5DB.java
@ -129,7 +129,7 @@ public class MD5DB {
                System.out.printf("##### Skipping update, cannot write file %s%n", dbFile);
            }
        } else {
-            System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
+            //System.out.printf("##### MD5 file is up to date: %s%n", dbFile.getPath());
        }
    }

@ -170,6 +170,18 @@ public class MD5DB {
        return bytes;
    }

+    public static class MD5Match {
+        final String md5;
+        final String failMessage;
+        boolean failed;
+
+        public MD5Match(final String md5, final String failMessage, final boolean failed) {
+            this.md5 = md5;
+            this.failMessage = failMessage;
+            this.failed = failed;
+        }
+    }
+
    /**
     * Tests a file MD5 against an expected value, returning the MD5.  NOTE: This function WILL throw an exception if the MD5s are different.
     * @param name Name of the test.
@ -178,18 +190,21 @@ public class MD5DB {
     * @param parameterize If true or if expectedMD5 is an empty string, will print out the calculated MD5 instead of error text.
     * @return The calculated MD5.
     */
-    public static String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
-        String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
+    public static MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5, final boolean parameterize) {
+        final String filemd5sum = testFileMD5(name, resultsFile, expectedMD5, parameterize);
+        String failMessage = null;
+        boolean failed = false;

        if (parameterize || expectedMD5.equals("")) {
            // Don't assert
        } else if ( filemd5sum.equals(expectedMD5) ) {
-            System.out.println(String.format("  => %s PASSED", name));
+            System.out.println(String.format("  => %s PASSED (expected=%s)", name, expectedMD5));
        } else {
-            Assert.fail(String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum));
+            failed = true;
+            failMessage = String.format("%s has mismatching MD5s: expected=%s observed=%s", name, expectedMD5, filemd5sum);
        }

-        return filemd5sum;
+        return new MD5Match(filemd5sum, failMessage, failed);
    }


@ -218,8 +233,8 @@ public class MD5DB {
                System.out.println(String.format("PARAMETERIZATION[%s]: file %s has md5 = %s, stated expectation is %s, equal? = %b",
                        name, resultsFile, filemd5sum, expectedMD5, filemd5sum.equals(expectedMD5)));
            } else {
-                System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
-                System.out.flush();
+                //System.out.println(String.format("Checking MD5 for %s [calculated=%s, expected=%s]", resultsFile, filemd5sum, expectedMD5));
+                //System.out.flush();

                if ( ! expectedMD5.equals(filemd5sum) ) {
                    // we are going to fail for real in assertEquals (so we are counted by the testing framework).
--- a/public/java/test/org/broadinstitute/sting/WalkerTest.java
+++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java
@ -52,7 +52,7 @@ public class WalkerTest extends BaseTest {
        GenomeAnalysisEngine.resetRandomGenerator();
    }

-    public String assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
+    public MD5DB.MD5Match assertMatchingMD5(final String name, final File resultsFile, final String expectedMD5) {
        return MD5DB.assertMatchingMD5(name, resultsFile, expectedMD5, parameterize());
    }

@ -84,10 +84,23 @@ public class WalkerTest extends BaseTest {

    public List<String> assertMatchingMD5s(final String name, List<File> resultFiles, List<String> expectedMD5s) {
        List<String> md5s = new ArrayList<String>();
+        List<MD5DB.MD5Match> fails = new ArrayList<MD5DB.MD5Match>();
+
        for (int i = 0; i < resultFiles.size(); i++) {
-            String md5 = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
-            maybeValidateSupplementaryFile(name, resultFiles.get(i));
-            md5s.add(i, md5);
+            MD5DB.MD5Match result = assertMatchingMD5(name, resultFiles.get(i), expectedMD5s.get(i));
+            if ( ! result.failed ) {
+                maybeValidateSupplementaryFile(name, resultFiles.get(i));
+                md5s.add(result.md5);
+            } else {
+                fails.add(result);
+            }
+        }
+
+        if ( ! fails.isEmpty() ) {
+            for ( final MD5DB.MD5Match fail : fails ) {
+                logger.warn("Fail: " + fail.failMessage);
+            }
+            Assert.fail("Test failed: " + name);
        }

        return md5s;
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java
@ -8,11 +8,9 @@ import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.executive.WindowMaker;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
-import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.testng.annotations.BeforeClass;
@ -51,7 +49,7 @@ public abstract class LocusViewTemplate extends BaseTest {

        GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
        Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);

@ -67,7 +65,7 @@ public abstract class LocusViewTemplate extends BaseTest {

        GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(shardBounds));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);

@ -82,7 +80,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -96,7 +94,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -110,7 +108,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -124,7 +122,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 5, 5)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -138,7 +136,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -152,7 +150,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -167,7 +165,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read1, read2);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -186,7 +184,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -205,7 +203,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(),new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -226,7 +224,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator(read1, read2, read3, read4, read5, read6);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 1, 10)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
@ -254,7 +252,7 @@ public abstract class LocusViewTemplate extends BaseTest {
                                                           read07, read08, read09, read10, read11, read12);

        Shard shard = new MockLocusShard(genomeLocParser,Collections.singletonList(genomeLocParser.createGenomeLoc("chr1", 6, 15)));
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs(), new SampleDataSource());
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, window.getSourceInfo(), genomeLocParser, window.getLocus(), window, null, null);
        LocusView view = createView(dataProvider);
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
@ -26,7 +26,6 @@ package org.broadinstitute.sting.gatk.datasources.reads;

 import com.google.caliper.Param;
 import net.sf.picard.filter.FilteringIterator;
-import net.sf.picard.filter.SamRecordFilter;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Tags;
@ -34,15 +33,12 @@ import org.broadinstitute.sting.gatk.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.baq.BAQ;

-import java.io.File;
 import java.util.Collections;
 import java.util.Iterator;

@ -88,12 +84,9 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
                                                               (byte)0);

            GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
-            SampleDataSource sampleDataSource = new SampleDataSource();
-            sampleDataSource.addSamplesFromSAMHeader(reader.getFileHeader());
-
            // Filter unmapped reads.  TODO: is this always strictly necessary?  Who in the GATK normally filters these out?
            Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
-            LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser,sampleDataSource);
+            LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
            while(locusIteratorByState.hasNext()) {
                locusIteratorByState.next().getLocation();
            }
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/sample/SampleDataSourceUnitTest.java
@ -1,241 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import net.sf.samtools.SAMFileHeader;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.Genotype;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.testng.Assert;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-
-import org.testng.annotations.Test;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Sep 9, 2010
- * Time: 8:21:00 AM
- */
-public class SampleDataSourceUnitTest extends BaseTest {
-
-    // this empty header used to instantiate sampledatasource objects
-    private static SAMFileHeader header = new SAMFileHeader();
-
-    // all the test sample files are located here
-    private String sampleFilesDir = validationDataLocation +  "samples/";
-
-    // make sure samples are created from the SAM file correctly
-    @Test()
-    public void loadSAMSamplesTest() {
-        SampleDataSource s = new SampleDataSource(header, null);
-    }
-
-    // tests that a basic sample with relationships loads correctly
-    // Note that this is the only test for family relationships - we may want to expand this
-    @Test()
-    public void basicLoadSampleFileTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-        Assert.assertTrue(s.sampleCount() == 5);
-        Sample sampleA = s.getSampleById("sampleA");
-        Sample sampleB = s.getSampleById("sampleB");
-        Assert.assertTrue(sampleB.getMother() == sampleA);
-        Assert.assertTrue(s.getChildren(sampleA).contains(sampleB));
-        Set<Sample> family = s.getFamily("family1");
-        Assert.assertTrue(family.size() == 2);
-        Assert.assertTrue(family.contains(sampleA));
-        Assert.assertTrue(family.contains(sampleB));
-    }
-
-    // but that file should fail if it has an extra character in it...
-    @Test(expectedExceptions=StingException.class)
-    public void loadInvalidSampleExtraCharText() {
-        File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraChar.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-    }
-
-    // ...or a typo...
-    @Test(expectedExceptions=StingException.class)
-    public void loadInvalidSampleTypoText() {
-        File sampleFile = new File(sampleFilesDir + "invalidSyntaxTypo.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-
-    }
-
-    // ...or an extra unrecognized array
-    @Test(expectedExceptions=StingException.class)
-    public void loadInvalidSampleExtraArrayText() {
-        File sampleFile = new File(sampleFilesDir + "invalidSyntaxExtraArray.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-    }
-
-    // make sure aliases work
-    @Test(expectedExceptions=StingException.class)
-    public void sampleAliasText() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFileWithAlias.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-        // this file has two samples, but one has an alias. let's make sure that checks out...
-        Assert.assertTrue(s.sampleCount() == 3);
-        Assert.assertTrue(s.getSampleById("sampleA") == s.getSampleById("sampleC"));
-    }
-
-    // error is thrown if property is included that's not in properties array
-    @Test(expectedExceptions=StingException.class)
-    public void unallowedPropertySampleTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedProperty.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-    }
-
-    // same as above, with relationship
-    @Test(expectedExceptions=StingException.class)
-    public void unallowedRelationshipSampleTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFileUnallowedRelationship.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-    }
-
-    // two sample files
-    @Test()
-    public void twoSampleFilesTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
-        File secondFile = new File(sampleFilesDir + "basicSampleFileExt.yaml");
-        ArrayList<File> files = new ArrayList<File>();
-        files.add(sampleFile);
-        files.add(secondFile);
-        SampleDataSource s = new SampleDataSource(header, files);
-        Assert.assertTrue(s.getSampleById("sampleA").getProperty("propC").equals("valC"));
-        Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
-    }
-
-    // two sample files, with contradictory properties
-    @Test(expectedExceptions=StingException.class)
-    public void twoContradictorySampleFilesTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
-        File secondFile = new File(sampleFilesDir + "basicSampleFileInvalidExt.yaml");
-        ArrayList<File> files = new ArrayList<File>();
-        files.add(sampleFile);
-        files.add(secondFile);
-        SampleDataSource s = new SampleDataSource(header, files);
-    }
-
-    // three sample files
-    @Test()
-    public void threeSamplesTest() {
-        File sampleFile = new File(sampleFilesDir + "basicSampleFile.yaml");
-        ArrayList<File> files = new ArrayList<File>();
-        files.add(sampleFile);
-        files.add(new File(sampleFilesDir + "basicSampleFileExt.yaml"));
-        files.add(new File(sampleFilesDir + "basicSampleFileExt2.yaml"));
-        SampleDataSource s = new SampleDataSource(header, files);
-        Assert.assertTrue(s.sampleCount() == 6);
-        Assert.assertTrue(s.getSampleById("sampleE").getProperty("propC").equals("valC"));
-        Assert.assertTrue(s.getSampleById("sampleA").getProperty("propA").equals("valA"));
-    }
-
-    /**
-     * testing getSamplesWithProperty
-     * in this file there are 5 samples - 2 with population "CEU", 1 with population "ABC", 1 with no population,
-     * and then the default null sample
-     */
-    @Test()
-    public void getSamplesWithPropertyTest() {
-        File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-        Assert.assertTrue(s.sampleCount() == 5);
-        Set<Sample> ceuSamples = s.getSamplesWithProperty("population", "CEU");
-        Assert.assertTrue(ceuSamples.size() == 2);
-
-        Iterator<Sample> i = ceuSamples.iterator();
-        ArrayList<String> sampleNames = new ArrayList<String>();
-        sampleNames.add(i.next().getId());
-        sampleNames.add(i.next().getId());
-        Assert.assertTrue(sampleNames.contains("sampleA"));
-        Assert.assertTrue(sampleNames.contains("sampleB"));
-    }
-
-    // make sure we can import data types other than Strings
-    @Test()
-    public void sampleTestPropertyType() {
-        File sampleFile = new File(sampleFilesDir + "sampleFileOtherTypes.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-        Sample sample = s.getSampleById("sampleA");
-        Assert.assertTrue(sample.getProperty("a").getClass() == Integer.class);
-        Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
-        Assert.assertTrue(sample.getProperty("c").getClass() == Double.class);
-        Assert.assertTrue(sample.getProperty("b").getClass() == String.class);
-    }
-
-    /**
-     * check that getSamplesFromVariantContext works
-     * create a variant context with two sample names, and make sure the right samples are there
-     */
-    @Test()
-    public void variantContextTest() {
-        SampleDataSource s = new SampleDataSource(header, null);
-        List<Allele> alleleCollection = new ArrayList<Allele>();
-        Allele a1 = Allele.create("A", true);
-        alleleCollection.add(a1);
-
-        Set<Genotype> genotypeCollection = new HashSet<Genotype>();
-        genotypeCollection.add(new Genotype("NA123", alleleCollection));
-        genotypeCollection.add(new Genotype("NA456", alleleCollection));
-
-        VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
-
-        // make sure the set that's returned is the right size
-        HashSet<Sample> set = (HashSet) s.getSamplesByVariantContext(v);
-        Assert.assertTrue(set.size() == 2);
-
-        // make sure both samples are included
-        Iterator<Sample> i = set.iterator();
-        ArrayList<String> sampleNames = new ArrayList<String>();
-        sampleNames.add(i.next().getId());
-        sampleNames.add(i.next().getId());
-        Assert.assertTrue(sampleNames.contains("NA123"));
-        Assert.assertTrue(sampleNames.contains("NA456"));
-    }
-
-    /**
-     * checking subContextFromSampleProperty
-     */
-
-    /**
-     * check that subContextFromSampleProperty works
-     * create a variant context with four sample names, make sure that it filters correctly to 2
-     */
-    @Test()
-    public void subContextFromSamplePropertyTest() {
-
-        File sampleFile = new File(sampleFilesDir + "sampleFileWithProperties.yaml");
-        SampleDataSource s = new SampleDataSource(header, makeFileList(sampleFile));
-        Assert.assertTrue(s.sampleCount() == 5);
-
-        List<Allele> alleleCollection = new ArrayList<Allele>();
-        Allele a1 = Allele.create("A", true);
-        alleleCollection.add(a1);
-
-        Set<Genotype> genotypeCollection = new HashSet<Genotype>();
-        genotypeCollection.add(new Genotype("NA123", alleleCollection));
-        genotypeCollection.add(new Genotype("sampleA", alleleCollection));
-        genotypeCollection.add(new Genotype("sampleB", alleleCollection));
-        genotypeCollection.add(new Genotype("sampleC", alleleCollection));
-
-        VariantContext v = new VariantContext("contextName", "chr1", 1, 1, alleleCollection, genotypeCollection);
-        VariantContext subContext = s.subContextFromSampleProperty(v, "population", "CEU");
-
-        Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
-        Assert.assertTrue(subContext.getSampleNames().contains("sampleA"));
-        Assert.assertTrue(subContext.getSampleNames().size() == 2);
-
-    }
-    
-
-    // we create lots of single item lists...
-    private ArrayList<File> makeFileList(File file) {
-        ArrayList<File> a = new ArrayList<File>();
-        a.add(file);
-        return a;
-    }
-}
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/sample/SampleUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/sample/SampleUnitTest.java
@ -1,64 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.sample;
-
-import org.testng.Assert;
-import org.broadinstitute.sting.BaseTest;
-
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
-
-/**
- * Created by IntelliJ IDEA.
- * User: brett
- * Date: Sep 9, 2010
- * Time: 8:21:00 AM
- */
-public class SampleUnitTest extends BaseTest {
-
-    static Sample sampleA;
-    static Sample sampleA1;
-    static Sample sampleB;
-    static Sample sampleC;
-
-    @BeforeClass
-    public void init() {
-        sampleA = new Sample("sampleA");
-        sampleA.setProperty("uniqueProperty", "uniqueValue");
-        sampleA1 = new Sample("sampleA");
-        sampleA1.setProperty("uniqueProperty", "uniqueValue");
-        sampleB = new Sample("sampleB");
-        sampleC = new Sample("sampleC");
-        sampleC.setProperty("population", "pop1");
-        sampleC.setProperty("gender", Sample.Gender.MALE);
-    }
-
-    /**
-     * Testing equality
-     */
-    @Test()
-    public void equalsTest() {
-        Assert.assertTrue(sampleA.equals(sampleA1));
-        Assert.assertFalse(sampleA == sampleA1);
-        Assert.assertFalse(sampleA.equals(sampleB));
-    }
-
-    /**
-     * And hash
-     */
-    @Test()
-    public void basicHashTest() {
-        Assert.assertFalse(sampleA.hashCode() == sampleB.hashCode());
-        Assert.assertTrue(sampleA.hashCode() == sampleA1.hashCode());
-    }
-
-    /**
-     * Now test the special getter methods
-     */
-    @Test()
-    public void specialGettersTest() {
-        Assert.assertTrue(sampleC.getId().equals("sampleC"));
-        Assert.assertTrue(sampleC.getPopulation().equals("pop1"));
-        Assert.assertTrue(sampleC.isMale());
-        Assert.assertFalse(sampleA.isMale());   // sample A doesn't have a gender, so this should be false
-    }
-
-}                     
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
@ -1,6 +1,5 @@
 package org.broadinstitute.sting.gatk.iterators;

-import net.sf.picard.filter.SamRecordFilter;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
@ -12,7 +11,6 @@ import org.testng.Assert;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
-import org.broadinstitute.sting.gatk.datasources.sample.SampleDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -29,11 +27,8 @@ import java.util.*;
 * testing of the LocusIteratorByState
 */
 public class LocusIteratorByStateUnitTest extends BaseTest {
-
-    private final int MAX_READS = 10;
    private static SAMFileHeader header;
    private LocusIteratorByState li;
-
    private GenomeLocParser genomeLocParser;

    @BeforeClass
@ -42,6 +37,11 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
    }

+    private final LocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
+        return new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),
+                readAttributes, genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
+    }
+
    @Test
    public void testIndelBaseQualityFiltering() {
        final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
@ -68,7 +68,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        List<SAMRecord> reads = Arrays.asList(before,during,after);

        // create the iterator by state with the fake reads and fake records
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
+        li = makeLTBS(reads,readAttributes);

        boolean foundExtendedEventPileup = false;
        while (li.hasNext()) {
@ -120,7 +120,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        List<SAMRecord> reads = Arrays.asList(before,during,after);

        // create the iterator by state with the fake reads and fake records
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser, new SampleDataSource());
+        li = makeLTBS(reads,readAttributes);

        boolean foundExtendedEventPileup = false;
        while (li.hasNext()) {
@ -154,7 +154,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        List<SAMRecord> reads = Arrays.asList(indelOnlyRead);

        // create the iterator by state with the fake reads and fake records
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
+        li = makeLTBS(reads, readAttributes);

        // Traditionally, reads that end with indels bleed into the pileup at the following locus.  Verify that the next pileup contains this read
        // and considers it to be an indel-containing read.
@ -167,7 +167,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {

        // Turn on extended events, and make sure the event is found.
        JVMUtils.setFieldValue(JVMUtils.findField(ReadProperties.class,"generateExtendedEvents"),readAttributes,true);
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
+        li = makeLTBS(reads, readAttributes);

        Assert.assertTrue(li.hasNext(),"LocusIteratorByState with extended events should contain exactly one pileup");
        alignmentContext = li.next();
@ -203,7 +203,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);

        // create the iterator by state with the fake reads and fake records
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),createTestReadProperties(),genomeLocParser,new SampleDataSource());
+        li = makeLTBS(reads, createTestReadProperties());
        int currentLocus = firstLocus;
        int numAlignmentContextsFound = 0;

@ -260,7 +260,7 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
        List<SAMRecord> reads = Arrays.asList(leadingRead,indelOnlyRead,fullMatchAfterIndel);

        // create the iterator by state with the fake reads and fake records
-        li = new LocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()),readAttributes,genomeLocParser,new SampleDataSource());
+        li = makeLTBS(reads,readAttributes);

        Assert.assertTrue(li.hasNext(),"Missing first locus at " + firstLocus);
        AlignmentContext alignmentContext = li.next();
--- a/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/samples/PedReaderUnitTest.java
@ -0,0 +1,353 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.samples;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.StringReader;
+import java.util.*;
+
+/**
+ * UnitTest for PedReader
+ *
+ * @author Mark DePristo
+ * @since 2011
+ */
+public class PedReaderUnitTest extends BaseTest {
+    private static Logger logger = Logger.getLogger(PedReaderUnitTest.class);
+
+    private class PedReaderTest extends TestDataProvider {
+        public String fileContents;
+        public List<Sample> expectedSamples;
+        EnumSet<PedReader.MissingPedField> missing;
+
+        private PedReaderTest(final String name, final List<Sample> expectedSamples, final String fileContents) {
+            super(PedReaderTest.class, name);
+            this.fileContents = fileContents;
+            this.expectedSamples = expectedSamples;
+        }
+    }
+
+//     Family ID
+//     Individual ID
+//     Paternal ID
+//     Maternal ID
+//     Sex (1=male; 2=female; other=unknown)
+//     Phenotype
+//
+//     -9 missing
+//     0 missing
+//     1 unaffected
+//     2 affected
+
+    @DataProvider(name = "readerTest")
+    public Object[][] createPEDFiles() {
+        new PedReaderTest("singleRecordMale",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
+                "fam1 kid 0 0 1 1");
+
+        new PedReaderTest("singleRecordFemale",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.FEMALE, Affection.UNAFFECTED)),
+                "fam1 kid 0 0 2 1");
+
+        new PedReaderTest("singleRecordMissingGender",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.UNKNOWN, Affection.UNKNOWN)),
+                "fam1 kid 0 0 0 0");
+
+        // Affection
+        new PedReaderTest("singleRecordAffected",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED)),
+                "fam1 kid 0 0 1 2");
+
+        new PedReaderTest("singleRecordUnaffected",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNAFFECTED)),
+                "fam1 kid 0 0 1 1");
+
+        new PedReaderTest("singleRecordMissingAffection-9",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
+                "fam1 kid 0 0 1 -9");
+
+        new PedReaderTest("singleRecordMissingAffection0",
+                Arrays.asList(new Sample("kid", "fam1", null, null, Gender.MALE, Affection.UNKNOWN)),
+                "fam1 kid 0 0 1 0");
+
+        new PedReaderTest("multipleUnrelated",
+                Arrays.asList(
+                        new Sample("s1", "fam1", null, null, Gender.MALE,   Affection.UNAFFECTED),
+                        new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
+                String.format("%s%n%s",
+                        "fam1 s1 0 0 1 1",
+                        "fam2 s2 0 0 2 2"));
+
+        new PedReaderTest("multipleUnrelatedExtraLine",
+                Arrays.asList(
+                        new Sample("s1", "fam1", null, null, Gender.MALE,   Affection.UNAFFECTED),
+                        new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.AFFECTED)),
+                String.format("%s%n%s%n  %n", // note extra newlines and whitespace
+                        "fam1 s1 0 0 1 1",
+                        "fam2 s2 0 0 2 2"));
+
+        new PedReaderTest("explicitTrio",
+                Arrays.asList(
+                        new Sample("kid", "fam1", "dad", "mom", Gender.MALE,   Affection.AFFECTED),
+                        new Sample("dad", "fam1", null, null,   Gender.MALE,   Affection.UNAFFECTED),
+                        new Sample("mom", "fam1", null, null,   Gender.FEMALE, Affection.AFFECTED)),
+                String.format("%s%n%s%n%s",
+                        "fam1 kid dad mom 1 2",
+                        "fam1 dad 0   0   1 1",
+                        "fam1 mom 0   0   2 2"));
+
+        new PedReaderTest("implicitTrio",
+                Arrays.asList(
+                        new Sample("kid", "fam1", "dad", "mom", Gender.MALE,   Affection.AFFECTED),
+                        new Sample("dad", "fam1", null, null,   Gender.MALE,   Affection.UNKNOWN),
+                        new Sample("mom", "fam1", null, null,   Gender.FEMALE, Affection.UNKNOWN)),
+                "fam1 kid dad mom 1 2");
+
+        new PedReaderTest("partialTrio",
+                Arrays.asList(
+                        new Sample("kid", "fam1", "dad", "mom", Gender.MALE,   Affection.AFFECTED),
+                        new Sample("dad", "fam1", null, null,   Gender.MALE,   Affection.UNAFFECTED),
+                        new Sample("mom", "fam1", null, null,   Gender.FEMALE, Affection.UNKNOWN)),
+                String.format("%s%n%s",
+                        "fam1 kid dad mom 1 2",
+                        "fam1 dad 0   0   1 1"));
+
+        new PedReaderTest("bigPedigree",
+                Arrays.asList(
+                        new Sample("kid", "fam1", "dad",       "mom",      Gender.MALE,   Affection.AFFECTED),
+                        new Sample("dad", "fam1", "granddad1", "grandma1", Gender.MALE,   Affection.UNAFFECTED),
+                        new Sample("granddad1", "fam1", null, null,        Gender.MALE,   Affection.UNKNOWN),
+                        new Sample("grandma1",  "fam1", null, null,        Gender.FEMALE,   Affection.UNKNOWN),
+                        new Sample("mom", "fam1", "granddad2", "grandma2", Gender.FEMALE, Affection.AFFECTED),
+                        new Sample("granddad2", "fam1", null, null,        Gender.MALE,   Affection.UNKNOWN),
+                        new Sample("grandma2",  "fam1", null, null,        Gender.FEMALE,   Affection.UNKNOWN)),
+                String.format("%s%n%s%n%s",
+                        "fam1 kid dad       mom      1 2",
+                        "fam1 dad granddad1 grandma1 1 1",
+                        "fam1 mom granddad2 grandma2 2 2"));
+
+        // Quantitative trait
+        new PedReaderTest("OtherPhenotype",
+                Arrays.asList(
+                        new Sample("s1", "fam1", null, null, Gender.MALE,   Affection.OTHER, "1"),
+                        new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
+                String.format("%s%n%s",
+                        "fam1 s1 0 0 1 1",
+                        "fam2 s2 0 0 2 10.0"));
+
+        new PedReaderTest("OtherPhenotypeWithMissing",
+                Arrays.asList(
+                        new Sample("s1", "fam1", null, null, Gender.MALE,   Affection.UNKNOWN, Sample.UNSET_QT),
+                        new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10.0")),
+                String.format("%s%n%s",
+                        "fam1 s1 0 0 1 -9",
+                        "fam2 s2 0 0 2 10.0"));
+
+        new PedReaderTest("OtherPhenotypeOnlyInts",
+                Arrays.asList(
+                        new Sample("s1", "fam1", null, null, Gender.MALE,   Affection.OTHER, "1"),
+                        new Sample("s2", "fam2", null, null, Gender.FEMALE, Affection.OTHER, "10")),
+                String.format("%s%n%s",
+                        "fam1 s1 0 0 1 1",
+                        "fam2 s2 0 0 2 10"));
+
+        return PedReaderTest.getTests(PedReaderTest.class);
+    }
+
+    private static final void runTest(PedReaderTest test, String myFileContents, EnumSet<PedReader.MissingPedField> missing) {
+        logger.warn("Test " + test);
+        PedReader reader = new PedReader();
+        SampleDB sampleDB = new SampleDB();
+        List<Sample> readSamples = reader.parse(myFileContents, missing, sampleDB);
+        Assert.assertEquals(new HashSet<Sample>(test.expectedSamples), new HashSet<Sample>(readSamples));
+    }
+
+    @Test(enabled = true, dataProvider = "readerTest")
+    public void testPedReader(PedReaderTest test) {
+        runTest(test, test.fileContents, EnumSet.noneOf(PedReader.MissingPedField.class));
+    }
+
+    @Test(enabled = true, dataProvider = "readerTest")
+    public void testPedReaderWithComments(PedReaderTest test) {
+        runTest(test, String.format("#comment%n%s", test.fileContents), EnumSet.noneOf(PedReader.MissingPedField.class));
+    }
+
+    @Test(enabled = true, dataProvider = "readerTest")
+    public void testPedReaderWithSemicolons(PedReaderTest test) {
+        runTest(test,
+                test.fileContents.replace(String.format("%n"), ";"),
+                EnumSet.noneOf(PedReader.MissingPedField.class));
+    }
+
+    // -----------------------------------------------------------------
+    // missing format field tests
+    // -----------------------------------------------------------------
+
+    private class PedReaderTestMissing extends TestDataProvider {
+        public EnumSet<PedReader.MissingPedField> missingDesc;
+        public EnumSet<PedReader.Field> missingFields;
+        public final String fileContents;
+        public Sample expected;
+
+
+        private PedReaderTestMissing(final String name, final String fileContents,
+                                     EnumSet<PedReader.MissingPedField> missingDesc,
+                                     EnumSet<PedReader.Field> missingFields,
+                                     final Sample expected) {
+            super(PedReaderTestMissing.class, name);
+            this.fileContents = fileContents;
+            this.missingDesc = missingDesc;
+            this.missingFields = missingFields;
+            this.expected = expected;
+        }
+    }
+
+    @DataProvider(name = "readerTestMissing")
+    public Object[][] createPEDFilesWithMissing() {
+        new PedReaderTestMissing("missingFam",
+                "fam1 kid dad mom 1 2",
+                EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID),
+                EnumSet.of(PedReader.Field.FAMILY_ID),
+                new Sample("kid", null, "dad", "mom", Gender.MALE, Affection.AFFECTED));
+
+        new PedReaderTestMissing("missingParents",
+                "fam1 kid dad mom 1 2",
+                EnumSet.of(PedReader.MissingPedField.NO_PARENTS),
+                EnumSet.of(PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID),
+                new Sample("kid", "fam1", null, null, Gender.MALE, Affection.AFFECTED));
+
+        new PedReaderTestMissing("missingSex",
+                "fam1 kid dad mom 1 2",
+                EnumSet.of(PedReader.MissingPedField.NO_SEX),
+                EnumSet.of(PedReader.Field.GENDER),
+                new Sample("kid", "fam1", "dad", "mom", Gender.UNKNOWN, Affection.AFFECTED));
+
+        new PedReaderTestMissing("missingPhenotype",
+                "fam1 kid dad mom 1 2",
+                EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE),
+                EnumSet.of(PedReader.Field.PHENOTYPE),
+                new Sample("kid", "fam1", "dad", "mom", Gender.MALE, Affection.UNKNOWN));
+
+        new PedReaderTestMissing("missingEverythingButGender",
+                "fam1 kid dad mom 1 2",
+                EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS, PedReader.MissingPedField.NO_FAMILY_ID),
+                EnumSet.of(PedReader.Field.FAMILY_ID, PedReader.Field.PATERNAL_ID, PedReader.Field.MATERNAL_ID, PedReader.Field.PHENOTYPE),
+                new Sample("kid", null, null, null, Gender.MALE, Affection.UNKNOWN));
+
+
+        return PedReaderTestMissing.getTests(PedReaderTestMissing.class);
+    }
+
+    @Test(enabled = true, dataProvider = "readerTestMissing")
+    public void testPedReaderWithMissing(PedReaderTestMissing test) {
+        final String contents = sliceContents(test.missingFields, test.fileContents);
+        logger.warn("Test " + test);
+        PedReader reader = new PedReader();
+        SampleDB sampleDB = new SampleDB();
+        reader.parse(new StringReader(contents), test.missingDesc, sampleDB);
+        final Sample missingSample = sampleDB.getSample("kid");
+        Assert.assertEquals(test.expected, missingSample, "Missing field value not expected value for " + test);
+    }
+
+    private final static String sliceContents(EnumSet<PedReader.Field> missingFieldsSet, String full) {
+        List<String> parts = new ArrayList<String>(Arrays.asList(full.split("\\s+")));
+        final List<PedReader.Field> missingFields = new ArrayList<PedReader.Field>(missingFieldsSet);
+        Collections.reverse(missingFields);
+        for ( PedReader.Field field : missingFields )
+            parts.remove(field.ordinal());
+        return Utils.join("\t", parts);
+    }
+
+    // -----------------------------------------------------------------
+    // parsing tags
+    // -----------------------------------------------------------------
+
+    private class PedReaderTestTagParsing extends TestDataProvider {
+        public EnumSet<PedReader.MissingPedField> expected;
+        public final List<String> tags;
+
+        private PedReaderTestTagParsing(final List<String> tags, EnumSet<PedReader.MissingPedField> missingDesc) {
+            super(PedReaderTestTagParsing.class);
+            this.tags = tags;
+            this.expected = missingDesc;
+        }
+    }
+
+    @DataProvider(name = "readerTestTagParsing")
+    public Object[][] createReaderTestTagParsing() {
+        new PedReaderTestTagParsing(
+                Collections.<String>emptyList(),
+                EnumSet.noneOf(PedReader.MissingPedField.class));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_FAMILY_ID"),
+                EnumSet.of(PedReader.MissingPedField.NO_FAMILY_ID));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_PARENTS"),
+                EnumSet.of(PedReader.MissingPedField.NO_PARENTS));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_PHENOTYPE"),
+                EnumSet.of(PedReader.MissingPedField.NO_PHENOTYPE));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_SEX"),
+                EnumSet.of(PedReader.MissingPedField.NO_SEX));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_SEX", "NO_PHENOTYPE"),
+                EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE));
+
+        new PedReaderTestTagParsing(
+                Arrays.asList("NO_SEX", "NO_PHENOTYPE", "NO_PARENTS"),
+                EnumSet.of(PedReader.MissingPedField.NO_SEX, PedReader.MissingPedField.NO_PHENOTYPE, PedReader.MissingPedField.NO_PARENTS));
+
+        return PedReaderTestTagParsing.getTests(PedReaderTestTagParsing.class);
+    }
+
+    @Test(enabled = true, dataProvider = "readerTestTagParsing")
+    public void testPedReaderTagParsing(PedReaderTestTagParsing test) {
+        EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", test.tags);
+        Assert.assertEquals(test.expected, parsed, "Failed to properly parse tags " + test.tags);
+    }
+
+    @Test(enabled = true, expectedExceptions = UserException.class)
+    public void testPedReaderTagParsing1() {
+        EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("XXX"));
+    }
+
+    @Test(enabled = true, expectedExceptions = UserException.class)
+    public void testPedReaderTagParsing2() {
+        EnumSet<PedReader.MissingPedField> parsed = PedReader.parseMissingFieldTags("test", Arrays.asList("NO_SEX", "XXX"));
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleDBUnitTest.java
@ -0,0 +1,157 @@
+package org.broadinstitute.sting.gatk.samples;
+
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: brett
+ * Date: Sep 9, 2010
+ * Time: 8:21:00 AM
+ */
+public class SampleDBUnitTest extends BaseTest {
+    private static SampleDBBuilder builder;
+    // all the test sample files are located here
+    private File testPED = new File(testDir +  "ceutrio.ped");
+
+    private static final Set<Sample> testPEDSamples = new HashSet<Sample>(Arrays.asList(
+            new Sample("kid", "fam1", "dad", "mom", Gender.MALE,   Affection.AFFECTED),
+            new Sample("dad", "fam1", null, null,   Gender.MALE,   Affection.UNAFFECTED),
+            new Sample("mom", "fam1", null, null,   Gender.FEMALE, Affection.AFFECTED)));
+
+    private static final Set<Sample> testSAMSamples = new HashSet<Sample>(Arrays.asList(
+            new Sample("kid", null, null, null, Gender.UNKNOWN,   Affection.UNKNOWN),
+            new Sample("mom", null, null, null, Gender.UNKNOWN,   Affection.UNKNOWN),
+            new Sample("dad", null, null, null, Gender.UNKNOWN,   Affection.UNKNOWN)));
+
+    private static final String testPEDString =
+            String.format("%s%n%s%n%s",
+                    "fam1 kid dad mom 1 2",
+                    "fam1 dad 0   0   1 1",
+                    "fam1 mom 0   0   2 2");
+
+    private static final String testPEDMultipleFamilies =
+            String.format("%s%n%s%n%s%n%s%n%s",
+                    "fam1 kid dad mom 1 2",
+                    "fam1 dad 0   0   1 1",
+                    "fam1 mom 0   0   2 2",
+                    "fam3 s1  d1  m1  2 2",
+                    "fam2 s2  d2  m2  2 2");
+
+    private static final String testPEDStringInconsistentGender =
+            "fam1 kid 0   0   2 2";
+
+    private static final Set<Sample> testPEDSamplesAsSet =
+            new HashSet<Sample>(testPEDSamples);
+
+
+    @BeforeMethod
+    public void before() {
+        builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
+    }
+
+    @Test()
+    public void loadPEDFile() {
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
+    }
+
+    @Test()
+    public void loadPEDString() {
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDString));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testPEDSamplesAsSet, db.getSamples());
+    }
+
+    private static final void addSAMHeader() {
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
+        ArtificialSAMUtils.createEnumeratedReadGroups(header, Arrays.asList("1", "2", "3"),
+                Arrays.asList("kid", "mom", "dad"));
+        builder.addSamplesFromSAMHeader(header);
+    }
+
+    @Test()
+    public void loadSAMHeader() {
+        addSAMHeader();
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testSAMSamples, db.getSamples());
+    }
+
+    @Test()
+    public void loadSAMHeaderPlusPED() {
+        addSAMHeader();
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testPEDSamples, db.getSamples());
+    }
+
+    @Test()
+    public void loadDuplicateData() {
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testPEDSamples, db.getSamples());
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void loadNonExistentFile() {
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(new File("non-existence-file.txt")));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(testSAMSamples, db.getSamples());
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void loadInconsistentData() {
+        builder = new SampleDBBuilder(PedigreeValidationType.STRICT);
+        builder.addSamplesFromPedigreeFiles(Arrays.asList(testPED));
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
+        builder.getFinalSampleDB();
+    }
+
+    @Test(expectedExceptions = UserException.class)
+    public void sampleInSAMHeaderNotInSamplesDB() {
+        addSAMHeader();
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDStringInconsistentGender));
+        builder.getFinalSampleDB();
+    }
+
+    @Test()
+    public void getFamilyIDs() {
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(db.getFamilyIDs(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
+    }
+
+    @Test()
+    public void getFamily() {
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
+        SampleDB db = builder.getFinalSampleDB();
+        Assert.assertEquals(db.getFamily("fam1"), testPEDSamplesAsSet);
+    }
+
+    @Test()
+    public void loadFamilyIDs() {
+        builder.addSamplesFromPedigreeStrings(Arrays.asList(testPEDMultipleFamilies));
+        SampleDB db = builder.getFinalSampleDB();
+        Map<String, Set<Sample>> families = db.getFamilies();
+        Assert.assertEquals(families.size(), 3);
+        Assert.assertEquals(families.keySet(), new TreeSet<String>(Arrays.asList("fam1", "fam2", "fam3")));
+
+        for ( final String famID : families.keySet() ) {
+            final Set<Sample> fam = families.get(famID);
+            Assert.assertEquals(fam.size(), 3);
+            for ( final Sample sample : fam ) {
+                Assert.assertEquals(sample.getFamilyID(), famID);
+            }
+        }
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/samples/SampleUnitTest.java
@ -0,0 +1,64 @@
+package org.broadinstitute.sting.gatk.samples;
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class SampleUnitTest extends BaseTest {
+    SampleDB db;
+    static Sample fam1A, fam1B, fam1C;
+    static Sample s1, s2;
+    static Sample trait1, trait2, trait3, trait4, trait5;
+
+    @BeforeClass
+    public void init() {
+        db = new SampleDB();
+
+        fam1A = new Sample("1A", db, "fam1", "1B", "1C", Gender.UNKNOWN);
+        fam1B = new Sample("1B", db, "fam1", null, null, Gender.MALE);
+        fam1C = new Sample("1C", db, "fam1", null, null, Gender.FEMALE);
+
+        s1 = new Sample("s1", db);
+        s2 = new Sample("s2", db);
+
+        trait1 = new Sample("t1", db, Affection.AFFECTED, Sample.UNSET_QT);
+        trait2 = new Sample("t2", db, Affection.UNAFFECTED, Sample.UNSET_QT);
+        trait3 = new Sample("t3", db, Affection.UNKNOWN, Sample.UNSET_QT);
+        trait4 = new Sample("t4", db, Affection.OTHER, "1.0");
+        trait5 = new Sample("t4", db, Affection.OTHER, "CEU");
+    }
+
+    /**
+     * Now basic getters
+     */
+    @Test()
+    public void normalGettersTest() {
+        Assert.assertEquals("1A", fam1A.getID());
+        Assert.assertEquals("fam1", fam1A.getFamilyID());
+        Assert.assertEquals("1B", fam1A.getPaternalID());
+        Assert.assertEquals("1C", fam1A.getMaternalID());
+        Assert.assertEquals(null, fam1B.getPaternalID());
+        Assert.assertEquals(null, fam1B.getMaternalID());
+
+        Assert.assertEquals(Affection.AFFECTED, trait1.getAffection());
+        Assert.assertEquals(Sample.UNSET_QT, trait1.getOtherPhenotype());
+        Assert.assertEquals(Affection.UNAFFECTED, trait2.getAffection());
+        Assert.assertEquals(Sample.UNSET_QT, trait2.getOtherPhenotype());
+        Assert.assertEquals(Affection.UNKNOWN, trait3.getAffection());
+        Assert.assertEquals(Sample.UNSET_QT, trait3.getOtherPhenotype());
+        Assert.assertEquals(Affection.OTHER, trait4.getAffection());
+        Assert.assertEquals("1.0", trait4.getOtherPhenotype());
+        Assert.assertEquals("CEU", trait5.getOtherPhenotype());
+    }
+
+    @Test()
+    public void testGenders() {
+        Assert.assertTrue(fam1A.getGender() == Gender.UNKNOWN);
+        Assert.assertTrue(fam1B.getGender() == Gender.MALE);
+        Assert.assertTrue(fam1C.getGender() == Gender.FEMALE);
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverageIntegrationTest.java
@ -57,11 +57,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
        // now add the expected files that get generated
        spec.addAuxFile("423571e4c05e7934322172654ac6dbb7", baseOutputFile);
        spec.addAuxFile("9df5e7e07efeb34926c94a724714c219", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_counts"));
-        spec.addAuxFile("b9a7748e5aec4dc06daed893c901c00d", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
+        spec.addAuxFile("229b9b5bc2141c86dbc69c8acc9eba6a", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_cumulative_coverage_proportions"));
        spec.addAuxFile("9cd395f47b329b9dd00ad024fcac9929", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_statistics"));
-        spec.addAuxFile("aec669d64d9dd652dd088a5341835ea5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
-        spec.addAuxFile("f6dbd74d32a48abe71ce08d300bce983", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
-        spec.addAuxFile("e3a3467ed259ee3680f8d01980f525b7", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
+        spec.addAuxFile("471c34ad2e4f7228efd20702d5941ba9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
+        spec.addAuxFile("9667c77284c2c08e647b162d0e9652d4", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_statistics"));
+        spec.addAuxFile("5a96c75f96d6fa6ee617451d731dae37", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_summary"));
        spec.addAuxFile("b82846df660f0aac8429aec57c2a62d6", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_counts"));
        spec.addAuxFile("d32a8c425fadcc4c048bd8b48d0f61e5", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_cumulative_coverage_proportions"));
        spec.addAuxFile("7b9d0e93bf5b5313995be7010ef1f528", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_interval_statistics"));
@ -69,11 +69,11 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
        spec.addAuxFile("e70952f241eebb9b5448f2e7cb288131", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_statistics"));
        spec.addAuxFile("054ed1e184f46d6a170dc9bf6524270c", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".read_group_summary"));
        spec.addAuxFile("d53431022f7387fe9ac47814ab1fcd88", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_counts"));
-        spec.addAuxFile("650ee3714da7fbad7832c9d4ad49eb51", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
+        spec.addAuxFile("a395dafde101971d2b9e5ddb6cd4b7d0", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_cumulative_coverage_proportions"));
        spec.addAuxFile("df0ba76e0e6082c0d29fcfd68efc6b77", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_statistics"));
-        spec.addAuxFile("7dcac2e8962c778081486332a4576dc3", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
-        spec.addAuxFile("a50011571334f17e950ad3ed1149e350", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
-        spec.addAuxFile("6f3260504295695d765af639539585c9", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
+        spec.addAuxFile("e013cb5b11b0321a81c8dbd7c1863787", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_interval_summary"));
+        spec.addAuxFile("661160f571def8c323345b5859cfb9da", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_statistics"));
+        spec.addAuxFile("c95a7a6840334cadd0e520939615c77b", createTempFileFromBase(baseOutputFile.getAbsolutePath()+".sample_summary"));
        
        execute("testBaseOutputNoFiltering",spec);
    }
@ -90,7 +90,7 @@ public class DepthOfCoverageIntegrationTest extends WalkerTest {
        spec.setOutputFileLocation(baseOutputFile);

        spec.addAuxFile("6ccd7d8970ba98cb95fe41636a070c1c",baseOutputFile);
-        spec.addAuxFile("0ee40f3e5091536c14e077b77557083a",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));
+        spec.addAuxFile("7d87783b3d98b928cac16d383ceca807",createTempFileFromBase(baseOutputFile.getAbsolutePath()+".library_interval_summary"));

        execute("testNoCoverageDueToFiltering",spec);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
    public void testMultiSamplePilot1() {
        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("e6639ea2dc81635c706e6c35921406d7"));
+                Arrays.asList("b27939251539439a382538e507e03507"));
        executeTest("test MultiSample Pilot1", spec);
    }

@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
        WalkerTest.WalkerTestSpec spec4 = new WalkerTest.WalkerTestSpec(
                baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "ALL.wgs.union_v2_chr20_100_110K.20101123.indels.sites.vcf -I " + validationDataLocation +
                        "phase1_GBR_realigned.chr20.100K-110K.bam -o %s -L 20:100,000-110,000", 1,
-                Arrays.asList("4be308fd9e8167ebee677f62a7a753b7"));
+                Arrays.asList("37e891bf1ac40caec9ea228f39c27e44"));
        executeTest("test MultiSample 1000G Phase1 indels with complicated records emitting all sites", spec4);

    }
--- a/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/ReadUtilsUnitTest.java
@ -7,7 +7,6 @@ import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.BeforeTest;
 import org.testng.annotations.Test;

--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
@ -25,16 +25,14 @@

 package org.broadinstitute.sting.utils.clipreads;

-import net.sf.samtools.*;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;

-import java.util.LinkedList;
-import java.util.List;
-
 /**
 * Created by IntelliJ IDEA.
 * User: roger
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalIntegrationTest.java
@ -76,7 +76,7 @@ public class IntervalIntegrationTest extends WalkerTest {
        // our base file
        File baseOutputFile = createTempFile("testUnmappedReadInclusion",".bam");
        spec.setOutputFileLocation(baseOutputFile);
-        spec.addAuxFile("99c266d777e2e167b8153c858c305fda",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
+        spec.addAuxFile("748a38ed5eb0a043dfc7b82f0d1e8063",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
        spec.addAuxFile("fadcdf88597b9609c5f2a17f4c6eb455", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));

        executeTest("testUnmappedReadInclusion",spec);
@ -96,8 +96,8 @@ public class IntervalIntegrationTest extends WalkerTest {
        // our base file
        File baseOutputFile = createTempFile("testUnmappedReadExclusion",".bam");
        spec.setOutputFileLocation(baseOutputFile);
-        spec.addAuxFile("8236f0b2df5a692e54751b08bc3836fa",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
-        spec.addAuxFile("651b42456d31ba24e913297b71b32143", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));
+        spec.addAuxFile("80887ba488e53dabd9596ff93070ae75",createTempFileFromBase(baseOutputFile.getAbsolutePath()));
+        spec.addAuxFile("b341d808ecc33217f37c0c0cde2a3e2f", createTempFileFromBase(baseOutputFile.getAbsolutePath().substring(0,baseOutputFile.getAbsolutePath().indexOf(".bam"))+".bai"));

        executeTest("testUnmappedReadExclusion",spec);
    }
--- a/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/pileup/ReadBackedPileupUnitTest.java
@ -28,7 +28,6 @@ import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
 import org.testng.Assert;
-import org.broadinstitute.sting.gatk.datasources.sample.Sample;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;

 import org.testng.annotations.Test;
@ -141,9 +140,9 @@ public class ReadBackedPileupUnitTest {
        ReadBackedPileupImpl sample2Pileup = new ReadBackedPileupImpl(null,
                                                                      Arrays.asList(read2,read4),
                                                                      Arrays.asList(1,1));
-        Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
-        sampleToPileupMap.put(new Sample(readGroupOne.getSample()),sample1Pileup);
-        sampleToPileupMap.put(new Sample(readGroupTwo.getSample()),sample2Pileup);
+        Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
+        sampleToPileupMap.put(readGroupOne.getSample(),sample1Pileup);
+        sampleToPileupMap.put(readGroupTwo.getSample(),sample2Pileup);

        ReadBackedPileup compositePileup = new ReadBackedPileupImpl(null,sampleToPileupMap);

@ -164,13 +163,13 @@ public class ReadBackedPileupUnitTest {

    @Test
    public void testGetPileupForSample() {
-        Sample sample1 = new Sample("sample1");
-        Sample sample2 = new Sample("sample2");
+        String sample1 = "sample1";
+        String sample2 = "sample2";

        SAMReadGroupRecord readGroupOne = new SAMReadGroupRecord("rg1");
-        readGroupOne.setSample(sample1.getId());
+        readGroupOne.setSample(sample1);
        SAMReadGroupRecord readGroupTwo = new SAMReadGroupRecord("rg2");
-        readGroupTwo.setSample(sample2.getId());        
+        readGroupTwo.setSample(sample2);

        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1,1,1000);
        header.addReadGroup(readGroupOne);
@ -181,24 +180,20 @@ public class ReadBackedPileupUnitTest {
        SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,10);
        read2.setAttribute("RG",readGroupTwo.getId());

-        Map<Sample,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<Sample,ReadBackedPileupImpl>();
+        Map<String,ReadBackedPileupImpl> sampleToPileupMap = new HashMap<String,ReadBackedPileupImpl>();
        sampleToPileupMap.put(sample1,new ReadBackedPileupImpl(null,Collections.singletonList(read1),0));
        sampleToPileupMap.put(sample2,new ReadBackedPileupImpl(null,Collections.singletonList(read2),0));

        ReadBackedPileup pileup = new ReadBackedPileupImpl(null,sampleToPileupMap);

-        ReadBackedPileup sample1Pileup = pileup.getPileupForSample(sample1);
-        Assert.assertEquals(sample1Pileup.size(),1,"Sample 1 pileup has wrong number of elements");
-        Assert.assertEquals(sample1Pileup.getReads().get(0),read1,"Sample 1 pileup has incorrect read");
-
-        ReadBackedPileup sample2Pileup = pileup.getPileupForSampleName(sample2.getId());
+        ReadBackedPileup sample2Pileup = pileup.getPileupForSample(sample2);
        Assert.assertEquals(sample2Pileup.size(),1,"Sample 2 pileup has wrong number of elements");
        Assert.assertEquals(sample2Pileup.getReads().get(0),read2,"Sample 2 pileup has incorrect read");

-        ReadBackedPileup missingSamplePileup = pileup.getPileupForSample(new Sample("missing"));
+        ReadBackedPileup missingSamplePileup = pileup.getPileupForSample("missing");
        Assert.assertNull(missingSamplePileup,"Pileup for sample 'missing' should be null but isn't");

-        missingSamplePileup = pileup.getPileupForSampleName("not here");
+        missingSamplePileup = pileup.getPileupForSample("not here");
        Assert.assertNull(missingSamplePileup,"Pileup for sample 'not here' should be null but isn't");
    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VCFJarClassLoadingUnitTest.java
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.variantcontext;
+
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.net.MalformedURLException;
+import java.net.URI;
+import java.net.URL;
+import java.net.URLClassLoader;
+
+/**
+ * Test to ensure that, given only the VCF jar and its expected dependencies, core VCF classes will load.
+ */
+public class VCFJarClassLoadingUnitTest {
+    @Test
+    public void testVCFJarClassLoading() throws ClassNotFoundException, MalformedURLException {
+        URI vcfURI = new File("dist/vcf.jar").toURI();
+        URI tribbleURI = new File("lib/tribble-24.jar").toURI();
+
+        ClassLoader classLoader = new URLClassLoader(new URL[] {vcfURI.toURL(),tribbleURI.toURL()}, null);
+        classLoader.loadClass("org.broadinstitute.sting.utils.variantcontext.VariantContext");
+        classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFCodec");
+        classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCF3Codec");
+        classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.VCFWriter");
+        classLoader.loadClass("org.broadinstitute.sting.utils.codecs.vcf.StandardVCFWriter");
+    }
+}
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/StandardVariantEvaluation.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/StandardVariantEvaluation.scala
@ -1,202 +0,0 @@
-package org.broadinstitute.sting.queue.qscripts
-
-import org.broadinstitute.sting.queue.QScript
-import org.broadinstitute.sting.queue.extensions.gatk.RodBind
-import org.broadinstitute.sting.queue.extensions.gatk._
-
-class StandardVariantEvaluation extends QScript {
-  // todo -- update to released version when things stabilize
-  @Argument(doc="gatkJarFile", required=false)
-  var gatkJarFile: File = new File("/home/radon01/depristo/dev/GenomeAnalysisTKFromLaptop/trunk/dist/GenomeAnalysisTK.jar")
-
-  @Argument(shortName = "R", doc="B37 reference sequence: defaults to broad standard location", required=false)
-  var referenceFile: File = new File("/humgen/1kg/reference/human_g1k_v37.fasta")
-
-  @Argument(shortName = "intervals", doc="intervals to evaluate.  Only supports evaluation on chromosome 20 now, as most evaluation data is there", required=false)
-  val TARGET_INTERVAL: String = "20"
-
-  @Argument(shortName = "includeUnion", doc="If provided, we'll create a union of the evaluation data sets for evaluation", required=false)
-  val CREATE_UNION: Boolean = false
-
-  @Argument(shortName = "dataDir", doc="Path to the standard evaluation data files", required=false)
-  val DATA_DIR = "/humgen/gsa-hpprojects/GATK/data/Comparisons/StandardForEvaluation/b37/"
-
-  @Argument(shortName = "evalStandard1000GCalls", doc="If provided, we'll include some standard 1000G data for evaluation", required=false)
-  val EVAL_STANDARD_1000G_CALLS: Boolean = false
-
-  val COMPS_DIR = DATA_DIR + "/comps/"
-  val EVALS_DIR = DATA_DIR + "/evals/"
-
-  @Argument(shortName = "moreSNPsToEval", doc="Path to additional SNP call sets for evaluation", required=false)
-  val moreSNPsToEval: List[File] = Nil
-
-  @Argument(shortName = "moreIndelsToEval", doc="Path to additional Indel call sets for evaluation", required=false)
-  val moreIndelsToEval: List[File] = Nil
-
-  val VARIANT_TYPES: List[String] = List("indels", "snps")
-  val VARIANT_TYPE_VT: Map[String, List[org.broad.tribble.util.variantcontext.VariantContext.Type]] = Map(
-    "indels" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.INDEL, org.broad.tribble.util.variantcontext.VariantContext.Type.MIXED, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION),
-    "snps" -> List(org.broad.tribble.util.variantcontext.VariantContext.Type.SNP, org.broad.tribble.util.variantcontext.VariantContext.Type.NO_VARIATION)
-  )
-
-  val SITES_DIR: String = "sitesFiles"
-
-  // path to b37 DBSNP
-  @Argument(shortName = "dbsnp", doc="Path to DBSNP **VCF** for evaluation", required=false)
-  val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/Comparisons/Validated/dbSNP/dbsnp_129_b37.leftAligned.vcf")
-  //val MY_DBSNP: File = new File("/humgen/gsa-hpprojects/GATK/data/dbsnp_132_b37.leftAligned.vcf");
-
-  class Comp(val name: String, val evalType: String, val filename: String, val MakeHomVar: Boolean = false) {
-    val originalFile = new File(COMPS_DIR + filename)
-    val file: File = if ( MakeHomVar ) swapExt(originalFile, ".vcf",".homvar.vcf") else originalFile
-    val sitesFile = new File(SITES_DIR + "/" + swapExt(file, ".vcf", ".sites.vcf").getName)
-  }
-
-  class Eval(val name: String, val evalType: String, val filename: String, val overrideFile: File = null ) {
-    val file: File = if ( overrideFile != null ) overrideFile else new File(EVALS_DIR + "/" + filename)
-  }
-
-  var COMPS: List[Comp] = Nil
-  def addComp(comp: Comp) { COMPS = comp :: COMPS }
-
-  var EVALS: List[Eval] = Nil
-  def addEval(eval: Eval) { EVALS = eval :: EVALS }
-  def addEvalFromCMD(file: File, t: String) { addEval(new Eval(file.getName, t, null, file)) }
-
-  trait UNIVERSAL_GATK_ARGS extends CommandLineGATK {
-    this.logging_level = "INFO";
-    this.jarFile = gatkJarFile;
-    this.intervalsString = List(TARGET_INTERVAL);
-    this.reference_sequence = referenceFile;
-    this.memoryLimit = 2
-  }
-
-  def initializeStandardDataFiles() = {
-    //
-    // Standard evaluation files for indels
-    //
-    addComp(new Comp("NA12878.homvar.GATK", "indels", "Indels.NA12878_WGS.filtered_Q50.0_QD5.0_SB-1.0_HR18.vcf", true))
-    addComp(new Comp("CG.38samples", "indels", "CG.Indels.leftAligned.b37.vcf"))
-    addComp(new Comp("NA12878.homvar.CG", "indels", "NA12878.CG.b37.indels.vcf", true))
-    addComp(new Comp("g1k.pilot1.validation", "indels", "pilot1_indel_validation_2009.b37.vcf"))
-    addComp(new Comp("NA12878.hand_curated", "indels", "NA12878.validated.curated.polymorphic.indels.vcf"))
-    addComp(new Comp("NA12878.Mullikin", "indels", "NA12878.DIPline.NQScm.expanded.chr20.b37.minReads_2_or_gt2bp.vcf"))
-
-
-    //
-    // INDEL call sets
-    //
-    if ( EVAL_STANDARD_1000G_CALLS ) {
-      addEval(new Eval("dindel", "indels", "20110208.chr20.dindel2.EUR.sites.vcf"))
-      addEval(new Eval("si", "indels", "20101123.chr20.si.v2.EUR.sites.vcf"))
-      addEval(new Eval("gatk", "indels", "EUR.phase1.chr20.broad.filtered.indels.sites.vcf"))
-    }
-
-    //
-    // Standard evaluation files for SNPs
-    //
-    addComp(new Comp("NA12878.homvar.GATK", "snps", "NA12878.HiSeq19.cut.vcf", true))
-    addComp(new Comp("CG.38samples", "snps", "CG.38samples.b37.vcf"))
-    addComp(new Comp("NA12878.homvar.CG", "snps", "NA12878.CG.b37.snps.vcf", true))
-    addComp(new Comp("HapMap3.3", "snps", "hapmap3.3.sites_r27_nr.b37_fwd.vcf"))
-    addComp(new Comp("OMNI.2.5M", "snps", "omni2.5.1212samples.b37.sites.chr20.monoAreAC0.vcf"))
-    addComp(new Comp("g1k.pilot1.validation", "snps", "1000G.snp.validation.b37.vcf"))
-
-    //
-    // SNP call sets
-    //
-    if ( EVAL_STANDARD_1000G_CALLS ) {
-      addEval(new Eval("1000G.gatk.eurPlus.phase1", "snps", "EUR+.phase1.chr20.broad.recal.vrcut1p0.sites.vcf"))
-      addEval(new Eval("1000G.high_specificity.phase1", "snps", "ALL.phase1.chr20.projectConsensus.highSpecificity.snps.genotypes.sites.vcf"))
-    }
-  }
-
-  def script = {
-    val sitesDir = new File(SITES_DIR)
-    if ( ! sitesDir.exists ) sitesDir.mkdirs()
-
-    initializeStandardDataFiles();
-
-    // add additional files for evaluation, if necessary
-    moreSNPsToEval.foreach(addEvalFromCMD(_, "snps"))
-    moreIndelsToEval.foreach(addEvalFromCMD(_, "indels"))
-
-    //
-    // create hom-var versions of key files
-    //
-    for ( comp <- COMPS )
-      if ( comp.MakeHomVar )
-        add(new SelectHomVars(comp.originalFile, comp.file))
-
-    for ( comp <- COMPS )
-        add(new JustSites(comp.file, comp.sitesFile))
-
-    //
-    // Loop over evaluation types
-    //
-    for ( evalType <- VARIANT_TYPES ) {
-      var evalsOfType = EVALS.filter(_.evalType == evalType)
-      val compsOfType = COMPS.filter(_.evalType == evalType)
-
-      if ( evalsOfType.size > 0 ) {
-
-        // if desired and possible, create a union.X.vcf file
-        if ( CREATE_UNION && evalsOfType.size > 1 ) {
-          val union: File = new File("union.%s.vcf".format(evalType))
-          add(new MyCombine(evalsOfType.map(_.file), union));
-          evalsOfType = new Eval("union", evalType, null, union) :: evalsOfType
-        }
-
-        // our root VE
-        val VE = new MyEval()
-        VE.VT = VARIANT_TYPE_VT(evalType)
-        VE.o = new File(evalType + ".eval")
-
-        // add evals
-        for ( calls <- evalsOfType )
-          VE.rodBind :+= RodBind("eval_" + calls.name, "VCF", calls.file)
-
-        // add comps
-        //VE.rodBind :+= RodBind("dbsnp", "VCF", MY_DBSNP)
-        for ( comp <- compsOfType )
-          VE.rodBind :+= RodBind("comp_" + comp.name, "VCF", comp.sitesFile)
-
-        add(VE)
-      }
-    }
-  }
-
-  /**
-   * Select homozygous non-reference sites from a single deep data set
-   */
-  class SelectHomVars(@Input(doc="foo") vcf: File, @Output(doc="foo") out: File) extends SelectVariants with UNIVERSAL_GATK_ARGS {
-    this.rodBind :+= RodBind("variant", "VCF", vcf)
-    this.o = out
-    this.select ++= List("\"AC == 2\"")
-  }
-
-  /**
-   * A simple union
-   */
-  class MyCombine(@Input(doc="foo") vcfs: List[File], @Output(doc="foo") out: File) extends CombineVariants with UNIVERSAL_GATK_ARGS {
-    for ( vcf <- vcfs )
-      this.rodBind :+= RodBind(vcf.getName, "VCF", vcf)
-    this.o = out
-  }
-
-  /**
-   * A command line (cut) that removes all genotyping information from a file
-   */
-  class JustSites(@Input(doc="foo") in: File, @Output(doc="foo") out: File) extends CommandLineFunction {
-    def commandLine = "cut -f 1-8 %s > %s".format(in, out)
-  }
-
-  /**
-   * Base class for VariantEval used here
-   */
-  class MyEval() extends VariantEval with UNIVERSAL_GATK_ARGS {
-    this.noST = true
-    this.evalModule :+= "ValidationReport"
-  }
-}
-
--- a/public/testdata/ceutrio.ped
+++ b/public/testdata/ceutrio.ped
@ -0,0 +1,3 @@
+fam1 kid dad mom 1 2
+fam1 dad 0   0   1 1
+fam1 mom 0   0   2 2