auto-merging to latest version

2012-09-07 11:36:47 -04:00 · 2012-09-07 11:36:47 -04:00 · 81b27f9db2
parent df749b27c1 f25bf0f927
commit 81b27f9db2
108 changed files with 6457 additions and 1408 deletions
--- a/build.xml
+++ b/build.xml
@ -577,6 +577,7 @@
                 docletpathref="doclet.classpath"
                 classpathref="external.dependencies"
                 classpath="${java.classes}"
+		 maxmemory="2g"
                 additionalparam="-build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -out ${basedir}/${resource.path} -quiet">
            <sourcefiles>
                <union>
@ -780,6 +781,7 @@
                 docletpathref="doclet.classpath"
                 classpathref="external.dependencies"
                 classpath="${java.classes}"
+		 maxmemory="2g"
                 additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet"> <!-- -test to only do DocumentationTest walker -->
            <sourcefiles>
                <fileset refid="java.source.files"/>
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@ -546,7 +546,7 @@ public class SlidingWindow {

        FractionalDownsampler <GATKSAMRecord> downsampler = new FractionalDownsampler<GATKSAMRecord>(fraction);
        downsampler.submit(allReads);
-        return downsampler.consumeDownsampledItems();
+        return downsampler.consumeFinalizedItems();
    }


--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -112,31 +112,31 @@ public class CommandLineGATK extends CommandLineExecutable {
        }
    }

-    protected static final String PICARD_TEXT_SAM_FILE_ERROR_1 = "Cannot use index file with textual SAM file";
-    protected static final String PICARD_TEXT_SAM_FILE_ERROR_2 = "Cannot retrieve file pointers within SAM text files";
+    public static final String PICARD_TEXT_SAM_FILE_ERROR_1 = "Cannot use index file with textual SAM file";
+    public static final String PICARD_TEXT_SAM_FILE_ERROR_2 = "Cannot retrieve file pointers within SAM text files";
    private static void checkForMaskedUserErrors(final Throwable t) {
        final String message = t.getMessage();
        if ( message == null )
            return;

        // we know what to do about the common "Too many open files" error
-        if ( message.indexOf("Too many open files") != -1 )
+        if ( message.contains("Too many open files") )
            exitSystemWithUserError(new UserException.TooManyOpenFiles());

        // malformed BAM looks like a SAM file
-        if ( message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_1) != -1 ||
-                message.indexOf(PICARD_TEXT_SAM_FILE_ERROR_2) != -1 )
+        if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) ||
+                message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
            exitSystemWithSamError(t);

        // can't close tribble index when writing
-        if ( message.indexOf("Unable to close index for") != -1 )
+        if ( message.contains("Unable to close index for") )
            exitSystemWithUserError(new UserException(t.getCause() == null ? message : t.getCause().getMessage()));

        // disk is full
-        if ( message.indexOf("No space left on device") != -1 )
-            exitSystemWithUserError(new UserException(t.getMessage()));
-        if ( t.getCause() != null && t.getCause().getMessage().indexOf("No space left on device") != -1 )
-            exitSystemWithUserError(new UserException(t.getCause().getMessage()));
+        if ( message.contains("No space left on device") )
+            exitSystemWithUserError(new UserException.NoSpaceOnDevice());
+        if ( t.getCause() != null && t.getCause().getMessage().contains("No space left on device") )
+            exitSystemWithUserError(new UserException.NoSpaceOnDevice());
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/DownsamplingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/DownsamplingMethod.java
@ -1,52 +0,0 @@
-package org.broadinstitute.sting.gatk;
-
-import org.broadinstitute.sting.utils.exceptions.UserException;
-
-/**
- * Describes the method for downsampling reads at a given locus.
- *
- * @author hanna
- * @version 0.1
- */
-
-public class DownsamplingMethod {
-    /**
-     * Type of downsampling to perform.
-     */
-    public final DownsampleType type;
-
-    /**
-     * Actual downsampling target is specified as an integer number of reads.
-     */
-    public final Integer toCoverage;
-
-    /**
-     * Actual downsampling target is specified as a fraction of total available reads.
-     */
-    public final Double toFraction;
-
-    /**
-     * Expresses no downsampling applied at all.
-     */
-    public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null);
-
-    public DownsamplingMethod(DownsampleType type, Integer toCoverage, Double toFraction) {
-        // Do some basic sanity checks on the downsampling parameters passed in.
-
-        // Can't leave toFraction and toCoverage null unless type is experimental naive duplicate eliminator.
-        if(type != DownsampleType.NONE && toFraction == null && toCoverage == null)
-            throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
-
-        // Fraction and coverage cannot both be specified.
-        if(toFraction != null && toCoverage != null)
-            throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified.  Please choose only one.");
-
-        // Experimental by sample downsampling does not work with a fraction of reads.
-        if(type == DownsampleType.BY_SAMPLE && toFraction != null)
-            throw new UserException.CommandLineException("Cannot downsample to fraction with new EXPERIMENTAL_BY_SAMPLE method");
-
-        this.type = type;
-        this.toCoverage = toCoverage;
-        this.toFraction = toFraction;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -36,6 +36,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.*;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.executive.MicroScheduler;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
@ -143,6 +144,8 @@ public class GenomeAnalysisEngine {
     */
    private ThreadAllocation threadAllocation;

+    private ReadMetrics cumulativeMetrics = null;
+
    /**
     * A currently hacky unique name for this GATK instance
     */
@ -398,28 +401,22 @@ public class GenomeAnalysisEngine {
     * Parse out the thread allocation from the given command-line argument.
     */
    private void determineThreadAllocation() {
-        Tags tags = parsingEngine.getTags(argCollection.numberOfThreads);
+        if ( argCollection.numberOfDataThreads < 1 ) throw new UserException.BadArgumentValue("num_threads", "cannot be less than 1, but saw " + argCollection.numberOfDataThreads);
+        if ( argCollection.numberOfCPUThreadsPerDataThread < 1 ) throw new UserException.BadArgumentValue("num_cpu_threads", "cannot be less than 1, but saw " + argCollection.numberOfCPUThreadsPerDataThread);
+        if ( argCollection.numberOfIOThreads < 0 ) throw new UserException.BadArgumentValue("num_io_threads", "cannot be less than 0, but saw " + argCollection.numberOfIOThreads);

-        // TODO: Kill this complicated logic once Queue supports arbitrary tagged parameters.
-        Integer numCPUThreads = null;
-        if(tags.containsKey("cpu") && argCollection.numberOfCPUThreads != null)
-            throw new UserException("Number of CPU threads specified both directly on the command-line and as a tag to the nt argument.  Please specify only one or the other.");
-        else if(tags.containsKey("cpu"))
-            numCPUThreads = Integer.parseInt(tags.getValue("cpu"));
-        else if(argCollection.numberOfCPUThreads != null)
-            numCPUThreads = argCollection.numberOfCPUThreads;
-
-        Integer numIOThreads = null;
-        if(tags.containsKey("io") && argCollection.numberOfIOThreads != null)
-            throw new UserException("Number of IO threads specified both directly on the command-line and as a tag to the nt argument.  Please specify only one or the other.");
-        else if(tags.containsKey("io"))
-            numIOThreads = Integer.parseInt(tags.getValue("io"));
-        else if(argCollection.numberOfIOThreads != null)
-            numIOThreads = argCollection.numberOfIOThreads;
-
-        this.threadAllocation = new ThreadAllocation(argCollection.numberOfThreads, numCPUThreads, numIOThreads, ! argCollection.disableEfficiencyMonitor);
+        this.threadAllocation = new ThreadAllocation(argCollection.numberOfDataThreads,
+                argCollection.numberOfCPUThreadsPerDataThread,
+                argCollection.numberOfIOThreads,
+                ! argCollection.disableEfficiencyMonitor);
    }

+    public int getTotalNumberOfThreads() {
+        return this.threadAllocation == null ? 1 : threadAllocation.getTotalNumThreads();
+    }
+
+
+
    /**
     * Allow subclasses and others within this package direct access to the walker manager.
     * @return The walker manager used by this package.
@ -445,14 +442,18 @@ public class GenomeAnalysisEngine {

    protected DownsamplingMethod getDownsamplingMethod() {
        GATKArgumentCollection argCollection = this.getArguments();
-        DownsamplingMethod method;
-        if(argCollection.getDownsamplingMethod() != null)
-            method = argCollection.getDownsamplingMethod();
-        else if(WalkerManager.getDownsamplingMethod(walker) != null)
-            method = WalkerManager.getDownsamplingMethod(walker);
-        else
-            method = GATKArgumentCollection.getDefaultDownsamplingMethod();
-        return method;
+        boolean useExperimentalDownsampling = argCollection.enableExperimentalDownsampling;
+
+        // until the file pointer bug with the experimental downsamplers is fixed, disallow running with experimental downsampling
+        if ( useExperimentalDownsampling ) {
+            throw new UserException("The experimental downsampling implementation is currently crippled by a file-pointer-related bug. Until this bug is fixed, it's not safe (or possible) for anyone to use the experimental implementation!");
+        }
+
+        DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
+        DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useExperimentalDownsampling);
+        DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useExperimentalDownsampling);
+
+        return commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
    }

    protected void setDownsamplingMethod(DownsamplingMethod method) {
@ -825,11 +826,13 @@ public class GenomeAnalysisEngine {
     * @return A data source for the given set of reads.
     */
    private SAMDataSource createReadsDataSource(GATKArgumentCollection argCollection, GenomeLocParser genomeLocParser, IndexedFastaSequenceFile refReader) {
-        DownsamplingMethod method = getDownsamplingMethod();
+        DownsamplingMethod downsamplingMethod = getDownsamplingMethod();

        // Synchronize the method back into the collection so that it shows up when
        // interrogating for the downsample method during command line recreation.
-        setDownsamplingMethod(method);
+        setDownsamplingMethod(downsamplingMethod);
+
+        logger.info(downsamplingMethod);

        if (argCollection.removeProgramRecords && argCollection.keepProgramRecords)
            throw new UserException.BadArgumentValue("rpr / kpr", "Cannot enable both options");
@ -847,7 +850,7 @@ public class GenomeAnalysisEngine {
                argCollection.useOriginalBaseQualities,
                argCollection.strictnessLevel,
                argCollection.readBufferSize,
-                method,
+                downsamplingMethod,
                new ValidationExclusion(Arrays.asList(argCollection.unsafe)),
                filters,
                readTransformers,
@ -1035,7 +1038,10 @@ public class GenomeAnalysisEngine {
     *         owned by the caller; the caller can do with the object what they wish.
     */
    public ReadMetrics getCumulativeMetrics() {
-        return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
+        // todo -- probably shouldn't be lazy
+        if ( cumulativeMetrics == null )
+            cumulativeMetrics = readsDataSource == null ? new ReadMetrics() : readsDataSource.getCumulativeReadMetrics();
+        return cumulativeMetrics;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk;
 import net.sf.picard.filter.SamRecordFilter;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.TreeMap;
@ -119,11 +118,18 @@ public class ReadMetrics implements Cloneable {
        return nRecords;
    }

+    /**
+     * Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed.
+     */
+    public void incrementNumIterations(final long by) {
+        nRecords += by;
+    }
+
    /**
     * Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed.
     */
    public void incrementNumIterations() {
-        nRecords++;
+        incrementNumIterations(1);
    }

    public long getNumReadsSeen() {
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadProperties.java
@ -4,6 +4,7 @@ import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMFileReader;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;

--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@ -27,6 +27,8 @@ package org.broadinstitute.sting.gatk;

 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
@ -304,9 +306,10 @@ public class WalkerManager extends PluginManager<Walker> {
     * downsampling method is specified on the command-line, the command-line version will
     * be used instead.
     * @param walkerClass The class of the walker to interrogate.
+     * @param useExperimentalDownsampling If true, use the experimental downsampling implementation
     * @return The downsampling method, as specified by the walker.  Null if none exists.
     */
-    public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass) {
+    public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useExperimentalDownsampling) {
        DownsamplingMethod downsamplingMethod = null;

        if( walkerClass.isAnnotationPresent(Downsample.class) ) {
@ -314,7 +317,7 @@ public class WalkerManager extends PluginManager<Walker> {
            DownsampleType type = downsampleParameters.by();
            Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
            Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
-            downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction);
+            downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useExperimentalDownsampling);
        }

        return downsamplingMethod;
@ -333,10 +336,11 @@ public class WalkerManager extends PluginManager<Walker> {
     * downsampling method is specified on the command-line, the command-line version will
     * be used instead.
     * @param walker The walker to interrogate.
+     * @param useExperimentalDownsampling If true, use the experimental downsampling implementation
     * @return The downsampling method, as specified by the walker.  Null if none exists.
     */
-    public static DownsamplingMethod getDownsamplingMethod(Walker walker) {
-        return getDownsamplingMethod(walker.getClass());
+    public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useExperimentalDownsampling) {
+        return getDownsamplingMethod(walker.getClass(), useExperimentalDownsampling);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -31,8 +31,8 @@ import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.IntervalBinding;
-import org.broadinstitute.sting.gatk.DownsampleType;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.phonehome.GATKRunReport;
 import org.broadinstitute.sting.gatk.samples.PedigreeValidationType;
 import org.broadinstitute.sting.utils.QualityUtils;
@ -41,7 +41,9 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;

 import java.io.File;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;

 /**
 * @author aaron
@ -138,15 +140,11 @@ public class GATKArgumentCollection {
    @Argument(fullName = "nonDeterministicRandomSeed", shortName = "ndrs", doc = "Makes the GATK behave non deterministically, that is, the random numbers generated will be different in every run", required = false)
    public boolean nonDeterministicRandomSeed = false;

-    /**
-     * The override mechanism in the GATK, by default, populates the command-line arguments, then
-     * the defaults from the walker annotations.  Unfortunately, walker annotations should be trumped
-     * by a user explicitly specifying command-line arguments.
-     * TODO: Change the GATK so that walker defaults are loaded first, then command-line arguments.
-     */
-    private static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
-    private static int DEFAULT_DOWNSAMPLING_COVERAGE = 1000;
-
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Downsampling Arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
    @Argument(fullName = "downsampling_type", shortName="dt", doc="Type of reads downsampling to employ at a given locus.  Reads will be selected randomly to be removed from the pile based on the method described here", required = false)
    public DownsampleType downsamplingType = null;

@ -156,17 +154,20 @@ public class GATKArgumentCollection {
    @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
    public Integer downsampleCoverage = null;

+    @Argument(fullName = "enable_experimental_downsampling", shortName = "enable_experimental_downsampling", doc = "Enable experimental engine-level downsampling", required = false)
+    @Hidden
+    public boolean enableExperimentalDownsampling = false;
+
    /**
     * Gets the downsampling method explicitly specified by the user.  If the user didn't specify
     * a default downsampling mechanism, return the default.
     * @return The explicitly specified downsampling mechanism, or the default if none exists.
     */
    public DownsamplingMethod getDownsamplingMethod() {
-        if(downsamplingType == null && downsampleFraction == null && downsampleCoverage == null)
+        if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
            return null;
-        if(downsamplingType == null && downsampleCoverage != null)
-            return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,downsampleCoverage,null);
-        return new DownsamplingMethod(downsamplingType,downsampleCoverage,downsampleFraction);
+
+        return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, enableExperimentalDownsampling);
    }

    /**
@ -176,9 +177,11 @@ public class GATKArgumentCollection {
    public void setDownsamplingMethod(DownsamplingMethod method) {
        if (method == null)
            throw new IllegalArgumentException("method is null");
+
        downsamplingType = method.type;
        downsampleCoverage = method.toCoverage;
        downsampleFraction = method.toFraction;
+        enableExperimentalDownsampling = method.useExperimentalDownsampling;
    }

    // --------------------------------------------------------------------------------------------------------------
@ -197,17 +200,14 @@ public class GATKArgumentCollection {
    // performance log arguments
    //
    // --------------------------------------------------------------------------------------------------------------
-    @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
-    public File performanceLog = null;

    /**
-     * Gets the default downsampling method, returned if the user didn't specify any downsampling
-     * method.
-     * @return The default downsampling mechanism, or null if none exists.
+     * The file name for the GATK performance log output, or null if you don't want to generate the
+     * detailed performance logging table.  This table is suitable for importing into R or any
+     * other analysis software that can read tsv files
     */
-    public static DownsamplingMethod getDefaultDownsamplingMethod() {
-        return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE,DEFAULT_DOWNSAMPLING_COVERAGE,null);
-    }
+    @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
+    public File performanceLog = null;

    @Argument(fullName="useOriginalQualities", shortName = "OQ", doc = "If set, use the original base quality scores from the OQ tag when present instead of the standard scores", required=false)
    public Boolean useOriginalBaseQualities = false;
@ -279,9 +279,32 @@ public class GATKArgumentCollection {
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

-    /** How many threads should be allocated to this analysis. */
-    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
-    public Integer numberOfThreads = 1;
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Multi-threading arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    /**
+     * How many data threads should be allocated to this analysis?  Data threads contains N cpu threads per
+     * data thread, and act as completely data parallel processing, increasing the memory usage of GATK
+     * by M data threads.  Data threads generally scale extremely effectively, up to 24 cores
+     */
+    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many data threads should be allocated to running this analysis.", required = false)
+    public Integer numberOfDataThreads = 1;
+
+    /**
+     * How many CPU threads should be allocated per data thread?  Each CPU thread operates the map
+     * cycle independently, but may run into earlier scaling problems with IO than data threads.  Has
+     * the benefit of not requiring X times as much memory per thread as data threads do, but rather
+     * only a constant overhead.
+     */
+    @Argument(fullName="num_cpu_threads_per_data_thread", shortName = "nct", doc="How many CPU threads should be allocated per data thread to running this analysis?", required = false)
+    public int numberOfCPUThreadsPerDataThread = 1;
+
+    @Argument(fullName="num_io_threads", shortName = "nit", doc="How many of the given threads should be allocated to IO", required = false)
+    @Hidden
+    public int numberOfIOThreads = 0;

    /**
     * By default the GATK monitors its own efficiency, but this can have a itsy-bitsy tiny
@ -291,17 +314,6 @@ public class GATKArgumentCollection {
    @Argument(fullName = "disableThreadEfficiencyMonitor", shortName = "dtem", doc = "Disable GATK efficiency monitoring", required = false)
    public Boolean disableEfficiencyMonitor = false;

-    /**
-     * The following two arguments (num_cpu_threads, num_io_threads are TEMPORARY since Queue cannot currently support arbitrary tagged data types.
-     * TODO: Kill this when I can do a tagged integer in Queue.
-     */
-    @Argument(fullName="num_cpu_threads", shortName = "nct", doc="How many of the given threads should be allocated to the CPU", required = false)
-    @Hidden
-    public Integer numberOfCPUThreads = null;
-    @Argument(fullName="num_io_threads", shortName = "nit", doc="How many of the given threads should be allocated to IO", required = false)
-    @Hidden
-    public Integer numberOfIOThreads = null;
-
    @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc="The total number of BAM file handles to keep open simultaneously", required=false)
    public Integer numberOfBAMFileHandles = null;

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
@ -1,6 +1,6 @@
 package org.broadinstitute.sting.gatk.datasources.providers;

-import org.broadinstitute.sting.gatk.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
@ -135,8 +135,13 @@ public abstract class LocusView extends LocusIterator implements View {

        // Cache the current and apply filtering.
        AlignmentContext current = nextLocus;
-        if( sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null )
+
+        // The old ALL_READS downsampling implementation -- only use if we're not using the new experimental downsampling:
+        if( ! sourceInfo.getDownsamplingMethod().useExperimentalDownsampling &&
+            sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null ) {
+
            current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
+        }

        // Indicate that the next operation will need to advance.
        nextLocus = null;
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -30,7 +30,9 @@ import net.sf.samtools.*;
 import net.sf.samtools.util.CloseableIterator;
 import net.sf.samtools.util.RuntimeIOException;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.downsampling.*;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadMetrics;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -152,6 +154,8 @@ public class SAMDataSource {
     */
    private final ThreadAllocation threadAllocation;

+    private final boolean expandShardsForDownsampling;
+
    /**
     * Create a new SAM data source given the supplied read metadata.
     * @param samFiles list of reads files.
@ -302,6 +306,11 @@ public class SAMDataSource {
                includeReadsWithDeletionAtLoci,
                defaultBaseQualities);

+        expandShardsForDownsampling = readProperties.getDownsamplingMethod() != null &&
+                                      readProperties.getDownsamplingMethod().useExperimentalDownsampling &&
+                                      readProperties.getDownsamplingMethod().type != DownsampleType.NONE &&
+                                      readProperties.getDownsamplingMethod().toCoverage != null;
+
        // cache the read group id (original) -> read group id (merged)
        // and read group id (merged) -> read group id (original) mappings.
        for(SAMReaderID id: readerIDs) {
@ -457,6 +466,16 @@ public class SAMDataSource {
        }
    }

+    /**
+     * Are we expanding shards as necessary to prevent shard boundaries from occurring at improper places?
+     *
+     * @return true if we are using expanded shards, otherwise false
+     */
+    public boolean usingExpandedShards() {
+        return expandShardsForDownsampling;
+    }
+
+
    /**
     * Fill the given buffering shard with reads.
     * @param shard Shard to fill.
@ -484,6 +503,31 @@ public class SAMDataSource {
            }
        }

+        // If the reads are sorted in coordinate order, ensure that all reads
+        // having the same alignment start become part of the same shard, to allow
+        // downsampling to work better across shard boundaries. Note that because our
+        // read stream has already been fed through the positional downsampler, which
+        // ensures that at each alignment start position there are no more than dcov
+        // reads, we're in no danger of accidentally creating a disproportionately huge
+        // shard
+        if ( expandShardsForDownsampling && sortOrder == SAMFileHeader.SortOrder.coordinate ) {
+            while ( iterator.hasNext() ) {
+                SAMRecord additionalRead = iterator.next();
+
+                // Stop filling the shard as soon as we encounter a read having a different
+                // alignment start or contig from the last read added in the earlier loop
+                // above, or an unmapped read
+                if ( read == null ||
+                     additionalRead.getReadUnmappedFlag() ||
+                     ! additionalRead.getReferenceIndex().equals(read.getReferenceIndex()) ||
+                     additionalRead.getAlignmentStart() != read.getAlignmentStart() ) {
+                    break;
+                }
+                shard.addRead(additionalRead);
+                noteFilePositionUpdate(positionUpdates, additionalRead);
+            }
+        }
+
        // If the reads are sorted in queryname order, ensure that all reads
        // having the same queryname become part of the same shard.
        if(sortOrder == SAMFileHeader.SortOrder.queryname) {
@ -578,6 +622,7 @@ public class SAMDataSource {
            iterator = new MalformedBAMErrorReformatingIterator(id.samFile, iterator);
            if(shard.getGenomeLocs().size() > 0)
                iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
+
            iteratorMap.put(readers.getReader(id), iterator);
        }

@ -660,20 +705,25 @@ public class SAMDataSource {
                                                        List<ReadTransformer> readTransformers,
                                                        byte defaultBaseQualities) {

-        // *********************************************************************************** //
-        // *  NOTE: ALL FILTERING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
-        // *     (otherwise we will process something that we may end up throwing away)      * //
-        // *********************************************************************************** //
+        // ************************************************************************************************ //
+        // *  NOTE: ALL FILTERING/DOWNSAMPLING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
+        // *     (otherwise we will process something that we may end up throwing away)                   * //
+        // ************************************************************************************************ //

-        if (downsamplingFraction != null)
-            wrappedIterator = new DownsampleIterator(wrappedIterator, downsamplingFraction);
+        wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
+
+        if ( readProperties.getDownsamplingMethod().useExperimentalDownsampling ) {
+            wrappedIterator = applyDownsamplingIterator(wrappedIterator);
+        }
+
+        // Use the old fractional downsampler only if we're not using experimental downsampling:
+        if ( ! readProperties.getDownsamplingMethod().useExperimentalDownsampling && downsamplingFraction != null )
+            wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);

        // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
        // verify the read ordering by applying a sort order iterator
        if (!noValidationOfReadOrder && enableVerification)
-            wrappedIterator = new VerifyingSamIterator(genomeLocParser,wrappedIterator);
-
-        wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
+            wrappedIterator = new VerifyingSamIterator(wrappedIterator);

        if (useOriginalBaseQualities || defaultBaseQualities >= 0)
            // only wrap if we are replacing the original qualities or using a default base quality
@ -688,6 +738,26 @@ public class SAMDataSource {
        return wrappedIterator;
    }

+    protected StingSAMIterator applyDownsamplingIterator( StingSAMIterator wrappedIterator ) {
+        if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
+            ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = readProperties.getDownsamplingMethod().toCoverage != null ?
+                                                                    new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
+                                                                    new FractionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
+
+            return new PerSampleDownsamplingReadsIterator(wrappedIterator, downsamplerFactory);
+        }
+        else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
+            ReadsDownsampler<SAMRecord> downsampler = readProperties.getDownsamplingMethod().toCoverage != null ?
+                                                      new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
+                                                      new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
+
+            return new DownsamplingReadsIterator(wrappedIterator, downsampler);
+        }
+
+        return wrappedIterator;
+    }
+
+
    private class SAMResourcePool {
        /**
         * How many entries can be cached in this resource pool?
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsampleType.java
@ -1,4 +1,4 @@
-package org.broadinstitute.sting.gatk;
+package org.broadinstitute.sting.gatk.downsampling;

 /**
 * Type of downsampling method to invoke.
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/Downsampler.java
@ -28,49 +28,92 @@ import java.util.Collection;
 import java.util.List;

 /**
- * The basic downsampler API, with no reads-specific operations
+ * The basic downsampler API, with no reads-specific operations.
+ *
+ * Downsamplers that extend this interface rather than the ReadsDownsampler interface can handle
+ * any kind of item, however they cannot be wrapped within a DownsamplingReadsIterator or a
+ * PerSampleDownsamplingReadsIterator.
 *
 * @author David Roazen
 */
 public interface Downsampler<T> {

-    /*
-     * Submit one item to the downsampler for consideration . Some downsamplers will be able to determine
+    /**
+     * Submit one item to the downsampler for consideration. Some downsamplers will be able to determine
     * immediately whether the item survives the downsampling process, while others will need to see
     * more items before making that determination.
+     *
+     * @param item the individual item to submit to the downsampler for consideration
     */
    public void submit( T item );

-    /*
-     * Submit a collection of items to the downsampler for consideration.
+    /**
+     * Submit a collection of items to the downsampler for consideration. Should be equivalent to calling
+     * submit() on each individual item in the collection.
+     *
+     * @param items the collection of items to submit to the downsampler for consideration
     */
    public void submit( Collection<T> items );

-    /*
+    /**
     * Are there items that have survived the downsampling process waiting to be retrieved?
+     *
+     * @return true if this downsampler has > 0 finalized items, otherwise false
     */
-    public boolean hasDownsampledItems();
+    public boolean hasFinalizedItems();

-    /*
-     * Return (and remove) all items that have survived downsampling and are waiting to be retrieved.
+    /**
+     * Return (and *remove*) all items that have survived downsampling and are waiting to be retrieved.
+     *
+     * @return a list of all finalized items this downsampler contains, or an empty list if there are none
     */
-    public List<T> consumeDownsampledItems();
+    public List<T> consumeFinalizedItems();

-    /*
+    /**
     * Are there items stored in this downsampler that it doesn't yet know whether they will
     * ultimately survive the downsampling process?
+     *
+     * @return true if this downsampler has > 0 pending items, otherwise false
     */
    public boolean hasPendingItems();

-    /*
+    /**
+     * Peek at the first finalized item stored in this downsampler (or null if there are no finalized items)
+     *
+     * @return the first finalized item in this downsampler (the item is not removed from the downsampler by this call),
+     *         or null if there are none
+     */
+    public T peekFinalized();
+
+    /**
+     * Peek at the first pending item stored in this downsampler (or null if there are no pending items)
+     *
+     * @return the first pending item stored in this downsampler (the item is not removed from the downsampler by this call),
+     *         or null if there are none
+     */
+    public T peekPending();
+
+    /**
+     * Returns the number of items discarded (so far) during the downsampling process
+     *
+     * @return the number of items that have been submitted to this downsampler and discarded in the process of
+     *         downsampling
+     */
+    public int getNumberOfDiscardedItems();
+
+    /**
     * Used to tell the downsampler that no more items will be submitted to it, and that it should
     * finalize any pending items.
     */
    public void signalEndOfInput();

-    /*
-     * Reset the downsampler to a clean state, devoid of any pending/downsampled items or tracked state
-     * information.
+    /**
+     * Empty the downsampler of all finalized/pending items
     */
    public void clear();
+
+    /**
+     * Reset stats in the downsampler such as the number of discarded items *without* clearing the downsampler of items
+     */
+    public void reset();
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java
@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+/**
+ * Describes the method for downsampling reads at a given locus.
+ */
+
+public class DownsamplingMethod {
+    /**
+     * Type of downsampling to perform.
+     */
+    public final DownsampleType type;
+
+    /**
+     * Actual downsampling target is specified as an integer number of reads.
+     */
+    public final Integer toCoverage;
+
+    /**
+     * Actual downsampling target is specified as a fraction of total available reads.
+     */
+    public final Double toFraction;
+
+    /**
+     * Use the new experimental downsampling?
+     */
+    public final boolean useExperimentalDownsampling;
+
+    /**
+     * Expresses no downsampling applied at all.
+     */
+    public static final DownsamplingMethod NONE = new DownsamplingMethod(DownsampleType.NONE,null,null,false);
+
+    /**
+     * Default type to use if no type is specified
+     */
+    public static DownsampleType DEFAULT_DOWNSAMPLING_TYPE = DownsampleType.BY_SAMPLE;
+
+    /**
+     * Default target coverage for locus-based traversals
+     */
+    public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
+
+    public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
+        this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
+        this.toCoverage = toCoverage;
+        this.toFraction = toFraction;
+        this.useExperimentalDownsampling = useExperimentalDownsampling;
+
+        if ( type == DownsampleType.NONE ) {
+            toCoverage = null;
+            toFraction = null;
+        }
+
+        validate();
+    }
+
+    private void validate() {
+        // Can't leave toFraction and toCoverage null unless type is NONE
+        if ( type != DownsampleType.NONE && toFraction == null && toCoverage == null )
+            throw new UserException.CommandLineException("Must specify either toFraction or toCoverage when downsampling.");
+
+        // Fraction and coverage cannot both be specified.
+        if ( toFraction != null && toCoverage != null )
+            throw new UserException.CommandLineException("Downsampling coverage and fraction are both specified.  Please choose only one.");
+
+        // toCoverage must be > 0 when specified
+        if ( toCoverage != null && toCoverage <= 0 ) {
+            throw new UserException.CommandLineException("toCoverage must be > 0 when downsampling to coverage");
+        }
+
+        // toFraction must be >= 0.0 and <= 1.0 when specified
+        if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
+            throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
+        }
+
+        // Some restrictions only exist for the old downsampling implementation:
+        if ( ! useExperimentalDownsampling ) {
+            // By sample downsampling does not work with a fraction of reads in the old downsampling implementation
+            if( type == DownsampleType.BY_SAMPLE && toFraction != null )
+                throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
+        }
+
+        // Some restrictions only exist for the new downsampling implementation:
+        if ( useExperimentalDownsampling ) {
+            if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
+                throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
+            }
+        }
+    }
+
+    public String toString() {
+        StringBuilder builder = new StringBuilder("Downsampling Settings: ");
+
+        if ( type == DownsampleType.NONE ) {
+            builder.append("No downsampling");
+        }
+        else {
+            builder.append(String.format("Method: %s ", type));
+
+            if ( toCoverage != null ) {
+                builder.append(String.format("Target Coverage: %d ", toCoverage));
+            }
+            else {
+                builder.append(String.format("Target Fraction: %.2f ", toFraction));
+            }
+
+            if ( useExperimentalDownsampling ) {
+                builder.append("Using Experimental Downsampling");
+            }
+        }
+
+        return builder.toString();
+    }
+
+    public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
+        if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
+            return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
+                                          null, useExperimentalDownsampling);
+        }
+        else {
+            return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingReadsIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingReadsIterator.java
@ -33,7 +33,8 @@ import java.util.NoSuchElementException;


 /**
- * StingSAMIterator wrapper around our generic reads downsampler interface
+ * StingSAMIterator wrapper around our generic reads downsampler interface. Converts the push-style
+ * downsampler interface to a pull model.
 *
 * @author David Roazen
 */
@ -42,35 +43,50 @@ public class DownsamplingReadsIterator implements StingSAMIterator {
    private StingSAMIterator nestedSAMIterator;
    private ReadsDownsampler<SAMRecord> downsampler;
    private Collection<SAMRecord> downsampledReadsCache;
-    private Iterator<SAMRecord> downsampledReadsCacheIterator;
+    private SAMRecord nextRead = null;
+    private Iterator<SAMRecord> downsampledReadsCacheIterator = null;

+    /**
+     * @param iter wrapped iterator from which this iterator will pull reads
+     * @param downsampler downsampler through which the reads will be fed
+     */
    public DownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsampler<SAMRecord> downsampler ) {
        nestedSAMIterator = iter;
        this.downsampler = downsampler;
-        fillDownsampledReadsCache();
+
+        advanceToNextRead();
    }

    public boolean hasNext() {
-        if ( downsampledReadsCacheIterator.hasNext() ) {
-            return true;
-        }
-        else if ( ! nestedSAMIterator.hasNext() || ! fillDownsampledReadsCache() ) {
-            return false;
-        }
-
-        return true;
+        return nextRead != null;
    }

    public SAMRecord next() {
-        if ( ! downsampledReadsCacheIterator.hasNext() && ! fillDownsampledReadsCache() ) {
+        if ( nextRead == null ) {
            throw new NoSuchElementException("next() called when there are no more items");
        }

-        return downsampledReadsCacheIterator.next();
+        SAMRecord toReturn = nextRead;
+        advanceToNextRead();
+
+        return toReturn;
+    }
+
+    private void advanceToNextRead() {
+        if ( ! readyToReleaseReads() && ! fillDownsampledReadsCache() ) {
+            nextRead = null;
+        }
+        else {
+            nextRead = downsampledReadsCacheIterator.next();
+        }
+    }
+
+    private boolean readyToReleaseReads() {
+        return downsampledReadsCacheIterator != null && downsampledReadsCacheIterator.hasNext();
    }

    private boolean fillDownsampledReadsCache() {
-        while ( nestedSAMIterator.hasNext() && ! downsampler.hasDownsampledItems() ) {
+        while ( nestedSAMIterator.hasNext() && ! downsampler.hasFinalizedItems() ) {
            downsampler.submit(nestedSAMIterator.next());
        }

@ -78,7 +94,8 @@ public class DownsamplingReadsIterator implements StingSAMIterator {
            downsampler.signalEndOfInput();
        }

-        downsampledReadsCache = downsampler.consumeDownsampledItems();
+        // use returned collection directly rather than make a copy, for speed
+        downsampledReadsCache = downsampler.consumeFinalizedItems();
        downsampledReadsCacheIterator = downsampledReadsCache.iterator();

        return downsampledReadsCacheIterator.hasNext();
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsampler.java
@ -33,7 +33,10 @@ import java.util.Collection;
 import java.util.List;

 /**
- * Fractional Downsampler: selects a specified fraction of the reads for inclusion
+ * Fractional Downsampler: selects a specified fraction of the reads for inclusion.
+ *
+ * Since the selection is done randomly, the actual fraction of reads retained may be slightly
+ * more or less than the requested fraction, depending on the total number of reads submitted.
 *
 * @author David Roazen
 */
@ -43,8 +46,16 @@ public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsamp

    private int cutoffForInclusion;

+    private int numDiscardedItems;
+
    private static final int RANDOM_POOL_SIZE = 10000;

+    /**
+     * Construct a FractionalDownsampler
+     *
+     * @param fraction Fraction of reads to preserve, between 0.0 (inclusive) and 1.0 (inclusive).
+     *                 Actual number of reads preserved may differ randomly.
+     */
    public FractionalDownsampler( double fraction ) {
        if ( fraction < 0.0 || fraction > 1.0 ) {
            throw new ReviewedStingException("Fraction of reads to include must be between 0.0 and 1.0, inclusive");
@ -52,12 +63,16 @@ public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsamp

        cutoffForInclusion = (int)(fraction * RANDOM_POOL_SIZE);
        clear();
+        reset();
    }

    public void submit( T newRead ) {
        if ( GenomeAnalysisEngine.getRandomGenerator().nextInt(10000) < cutoffForInclusion ) {
            selectedReads.add(newRead);
        }
+        else {
+            numDiscardedItems++;
+        }
    }

    public void submit( Collection<T> newReads ) {
@ -66,11 +81,12 @@ public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsamp
        }
    }

-    public boolean hasDownsampledItems() {
+    public boolean hasFinalizedItems() {
        return selectedReads.size() > 0;
    }

-    public List<T> consumeDownsampledItems() {
+    public List<T> consumeFinalizedItems() {
+        // pass by reference rather than make a copy, for speed
        List<T> downsampledItems = selectedReads;
        clear();
        return downsampledItems;
@ -80,6 +96,18 @@ public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsamp
        return false;
    }

+    public T peekFinalized() {
+        return selectedReads.isEmpty() ? null : selectedReads.get(0);
+    }
+
+    public T peekPending() {
+        return null;
+    }
+
+    public int getNumberOfDiscardedItems() {
+        return numDiscardedItems;
+    }
+
    public void signalEndOfInput() {
        // NO-OP
    }
@ -88,7 +116,15 @@ public class FractionalDownsampler<T extends SAMRecord> implements ReadsDownsamp
        selectedReads = new ArrayList<T>();
    }

+    public void reset() {
+        numDiscardedItems = 0;
+    }
+
    public boolean requiresCoordinateSortOrder() {
        return false;
    }
+
+    public void signalNoMoreReadsBefore( T read ) {
+        // NO-OP
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerFactory.java
@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Factory for creating FractionalDownsamplers on demand
+ *
+ * @author David Roazen
+ */
+public class FractionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
+
+    private double fraction;
+
+    public FractionalDownsamplerFactory( double fraction ) {
+        this.fraction = fraction;
+    }
+
+    public ReadsDownsampler<T> newInstance() {
+        return new FractionalDownsampler<T>(fraction);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/LevelingDownsampler.java
@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import org.broadinstitute.sting.utils.MathUtils;
+
+import java.util.*;
+
+/**
+ * Leveling Downsampler: Given a set of Lists of arbitrary items and a target size, removes items from
+ * the Lists in an even fashion until the total size of all Lists is <= the target size. Leveling
+ * does not occur until all Lists have been submitted and signalEndOfInput() is called.
+ *
+ * The Lists should be LinkedLists for maximum efficiency during item removal, however other
+ * kinds of Lists are also accepted (albeit at a slight performance penalty).
+ *
+ * Since this downsampler extends the Downsampler interface rather than the ReadsDownsampler interface,
+ * the Lists need not contain reads. However this downsampler may not be wrapped within one of the
+ * DownsamplingReadsIterators
+ *
+ * @param <T> the List type representing the stacks to be leveled
+ * @param <E> the type of the elements of each List
+ *
+ * @author David Roazen
+ */
+public class LevelingDownsampler<T extends List<E>, E> implements Downsampler<T> {
+
+    private int targetSize;
+
+    private List<T> groups;
+
+    private boolean groupsAreFinalized;
+
+    private int numDiscardedItems;
+
+    /**
+     * Construct a LevelingDownsampler
+     *
+     * @param targetSize the sum of the sizes of all individual Lists this downsampler is fed may not exceed
+     *                   this value -- if it does, items are removed from Lists evenly until the total size
+     *                   is <= this value
+     */
+    public LevelingDownsampler( int targetSize ) {
+        this.targetSize = targetSize;
+        clear();
+        reset();
+    }
+
+    public void submit( T item ) {
+        groups.add(item);
+    }
+
+    public void submit( Collection<T> items ){
+        groups.addAll(items);
+    }
+
+    public boolean hasFinalizedItems() {
+        return groupsAreFinalized && groups.size() > 0;
+    }
+
+    public List<T> consumeFinalizedItems() {
+        if ( ! hasFinalizedItems() ) {
+            return new ArrayList<T>();
+        }
+
+        // pass by reference rather than make a copy, for speed
+        List<T> toReturn = groups;
+        clear();
+        return toReturn;
+    }
+
+    public boolean hasPendingItems() {
+        return ! groupsAreFinalized && groups.size() > 0;
+    }
+
+    public T peekFinalized() {
+        return hasFinalizedItems() ? groups.get(0) : null;
+    }
+
+    public T peekPending() {
+        return hasPendingItems() ? groups.get(0) : null;
+    }
+
+    public int getNumberOfDiscardedItems() {
+        return numDiscardedItems;
+    }
+
+    public void signalEndOfInput() {
+        levelGroups();
+        groupsAreFinalized = true;
+    }
+
+    public void clear() {
+        groups = new ArrayList<T>();
+        groupsAreFinalized = false;
+    }
+
+    public void reset() {
+        numDiscardedItems = 0;
+    }
+
+    private void levelGroups() {
+        int totalSize = 0;
+        int[] groupSizes = new int[groups.size()];
+        int currentGroupIndex = 0;
+
+        for ( T group : groups ) {
+            groupSizes[currentGroupIndex] = group.size();
+            totalSize += groupSizes[currentGroupIndex];
+            currentGroupIndex++;
+        }
+
+        if ( totalSize <= targetSize ) {
+            return;    // no need to eliminate any items
+        }
+
+        // We will try to remove exactly this many items, however we will refuse to allow any
+        // one group to fall below size 1, and so might end up removing fewer items than this
+        int numItemsToRemove = totalSize - targetSize;
+
+        currentGroupIndex = 0;
+        int numConsecutiveUmodifiableGroups = 0;
+
+        // Continue until we've either removed all the items we wanted to, or we can't
+        // remove any more items without violating the constraint that all groups must
+        // be left with at least one item
+        while ( numItemsToRemove > 0 && numConsecutiveUmodifiableGroups < groupSizes.length ) {
+            if ( groupSizes[currentGroupIndex] > 1 ) {
+                groupSizes[currentGroupIndex]--;
+                numItemsToRemove--;
+                numConsecutiveUmodifiableGroups = 0;
+            }
+            else {
+                numConsecutiveUmodifiableGroups++;
+            }
+
+            currentGroupIndex = (currentGroupIndex + 1) % groupSizes.length;
+        }
+
+        // Now we actually go through and reduce each group to its new count as specified in groupSizes
+        currentGroupIndex = 0;
+        for ( T group : groups ) {
+            downsampleOneGroup(group, groupSizes[currentGroupIndex]);
+            currentGroupIndex++;
+        }
+    }
+
+    private void downsampleOneGroup( T group, int numItemsToKeep ) {
+        if ( numItemsToKeep >= group.size() ) {
+            return;
+        }
+
+        numDiscardedItems += group.size() - numItemsToKeep;
+
+        BitSet itemsToKeep = new BitSet(group.size());
+        for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(group.size(), numItemsToKeep) ) {
+            itemsToKeep.set(selectedIndex);
+        }
+
+        int currentIndex = 0;
+
+        // If our group is a linked list, we can remove the desired items in a single O(n) pass with an iterator
+        if ( group instanceof LinkedList ) {
+            Iterator iter = group.iterator();
+            while ( iter.hasNext() ) {
+                iter.next();
+
+                if ( ! itemsToKeep.get(currentIndex) ) {
+                    iter.remove();
+                }
+
+                currentIndex++;
+            }
+        }
+        // If it's not a linked list, it's more efficient to copy the desired items into a new list and back rather
+        // than suffer O(n^2) of item shifting
+        else {
+            List<E> keptItems = new ArrayList<E>(numItemsToKeep);
+
+            for ( E item : group ) {
+                if ( itemsToKeep.get(currentIndex) ) {
+                    keptItems.add(item);
+                }
+                currentIndex++;
+            }
+            group.clear();
+            group.addAll(keptItems);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIterator.java
@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMRecordComparator;
+import net.sf.samtools.SAMRecordCoordinateComparator;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+
+import java.util.*;
+
+
+/**
+ * StingSAMIterator wrapper around our generic reads downsampler interface
+ * that downsamples reads for each sample independently, and then re-assembles
+ * the reads back into a single merged stream.
+ *
+ * @author David Roazen
+ */
+public class PerSampleDownsamplingReadsIterator implements StingSAMIterator {
+
+    private StingSAMIterator nestedSAMIterator;
+    private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
+    private Map<String, ReadsDownsampler<SAMRecord>> perSampleDownsamplers;
+    private PriorityQueue<SAMRecord> orderedDownsampledReadsCache;
+    private SAMRecord nextRead = null;
+    private SAMRecordComparator readComparator = new SAMRecordCoordinateComparator();
+    private SAMRecord earliestPendingRead = null;
+    private ReadsDownsampler<SAMRecord> earliestPendingDownsampler = null;
+
+    // Initial size of our cache of finalized reads
+    private static final int DOWNSAMPLED_READS_INITIAL_CACHE_SIZE = 4096;
+
+    // The number of positional changes that can occur in the read stream before all downsamplers
+    // should be informed of the current position (guards against samples with relatively sparse reads
+    // getting stuck in a pending state):
+    private static final int DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL = 3;   // TODO: experiment with this value
+
+    /**
+     * @param iter wrapped iterator from which this iterator will pull reads
+     * @param downsamplerFactory factory used to create new downsamplers as needed
+     */
+    public PerSampleDownsamplingReadsIterator( StingSAMIterator iter, ReadsDownsamplerFactory<SAMRecord> downsamplerFactory ) {
+        nestedSAMIterator = iter;
+        this.downsamplerFactory = downsamplerFactory;
+        perSampleDownsamplers = new HashMap<String, ReadsDownsampler<SAMRecord>>();
+        orderedDownsampledReadsCache = new PriorityQueue<SAMRecord>(DOWNSAMPLED_READS_INITIAL_CACHE_SIZE, readComparator);
+
+        advanceToNextRead();
+    }
+
+    public boolean hasNext() {
+        return nextRead != null;
+    }
+
+    public SAMRecord next() {
+        if ( nextRead == null ) {
+            throw new NoSuchElementException("next() called when there are no more items");
+        }
+
+        SAMRecord toReturn = nextRead;
+        advanceToNextRead();
+
+        return toReturn;
+    }
+
+    private void advanceToNextRead() {
+        if ( ! readyToReleaseReads() && ! fillDownsampledReadsCache() ) {
+            nextRead = null;
+        }
+        else {
+            nextRead = orderedDownsampledReadsCache.poll();
+        }
+    }
+
+    private boolean readyToReleaseReads() {
+        if ( orderedDownsampledReadsCache.isEmpty() ) {
+            return false;
+        }
+
+        return earliestPendingRead == null ||
+               readComparator.compare(orderedDownsampledReadsCache.peek(), earliestPendingRead) <= 0;
+    }
+
+    private void updateEarliestPendingRead( ReadsDownsampler<SAMRecord> currentDownsampler ) {
+        // If there is no recorded earliest pending read and this downsampler has pending items,
+        // then this downsampler's first pending item becomes the new earliest pending read:
+        if ( earliestPendingRead == null && currentDownsampler.hasPendingItems() ) {
+            earliestPendingRead = currentDownsampler.peekPending();
+            earliestPendingDownsampler = currentDownsampler;
+        }
+        // In all other cases, we only need to update the earliest pending read when the downsampler
+        // associated with it experiences a change in its pending reads, since by assuming a sorted
+        // read stream we're assured that each downsampler's earliest pending read will only increase
+        // in genomic position over time.
+        //
+        // TODO: An occasional O(samples) linear search seems like a better option than keeping the downsamplers
+        // TODO: sorted by earliest pending read, which would cost at least O(total_reads * (samples + log(samples))),
+        // TODO: but need to verify this empirically.
+        else if ( currentDownsampler == earliestPendingDownsampler &&
+                  (! currentDownsampler.hasPendingItems() || readComparator.compare(currentDownsampler.peekPending(), earliestPendingRead) != 0) ) {
+
+            earliestPendingRead = null;
+            earliestPendingDownsampler = null;
+            for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
+                if ( perSampleDownsampler.hasPendingItems() &&
+                     (earliestPendingRead == null || readComparator.compare(perSampleDownsampler.peekPending(), earliestPendingRead) < 0) ) {
+
+                    earliestPendingRead = perSampleDownsampler.peekPending();
+                    earliestPendingDownsampler = perSampleDownsampler;
+                }
+            }
+        }
+    }
+
+    private boolean fillDownsampledReadsCache() {
+        SAMRecord prevRead = null;
+        int numPositionalChanges = 0;
+
+        // Continue submitting reads to the per-sample downsamplers until the read at the top of the priority queue
+        // can be released without violating global sort order
+        while ( nestedSAMIterator.hasNext() && ! readyToReleaseReads() ) {
+            SAMRecord read = nestedSAMIterator.next();
+            String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+
+            ReadsDownsampler<SAMRecord> thisSampleDownsampler = perSampleDownsamplers.get(sampleName);
+            if ( thisSampleDownsampler == null ) {
+                thisSampleDownsampler = downsamplerFactory.newInstance();
+                perSampleDownsamplers.put(sampleName, thisSampleDownsampler);
+            }
+
+            thisSampleDownsampler.submit(read);
+            updateEarliestPendingRead(thisSampleDownsampler);
+
+            if ( prevRead != null && prevRead.getAlignmentStart() != read.getAlignmentStart() ) {
+                numPositionalChanges++;
+            }
+
+            // If the number of times we've changed position exceeds a certain threshold, inform all
+            // downsamplers of the current position in the read stream. This is to prevent downsamplers
+            // for samples with sparser reads than others from getting stuck too long in a pending state.
+            if ( numPositionalChanges > DOWNSAMPLER_POSITIONAL_UPDATE_INTERVAL ) {
+                for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
+                    perSampleDownsampler.signalNoMoreReadsBefore(read);
+                    updateEarliestPendingRead(perSampleDownsampler);
+                }
+            }
+
+            prevRead = read;
+        }
+
+        if ( ! nestedSAMIterator.hasNext() ) {
+            for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
+                perSampleDownsampler.signalEndOfInput();
+            }
+            earliestPendingRead = null;
+            earliestPendingDownsampler = null;
+        }
+
+        for ( ReadsDownsampler<SAMRecord> perSampleDownsampler : perSampleDownsamplers.values() ) {
+            if ( perSampleDownsampler.hasFinalizedItems() ) {
+                orderedDownsampledReadsCache.addAll(perSampleDownsampler.consumeFinalizedItems());
+            }
+        }
+
+        return readyToReleaseReads();
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+    }
+
+    public void close() {
+        nestedSAMIterator.close();
+    }
+
+    public Iterator<SAMRecord> iterator() {
+        return this;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PositionalDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PositionalDownsampler.java
@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.downsampling;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-
-import java.util.*;
-
-/**
- * Positional Downsampler: When eliminating reads, try to do so evenly based on the alignment start positions
- *
- * @author David Roazen
- */
-public class PositionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
-
-    private int targetCoverage;
-
-    private ReservoirDownsampler<T> reservoir;
-
-    private int currentContigIndex;
-
-    private int currentAlignmentStart;
-
-    private LinkedList<PositionalReadGrouping> pendingReads;
-
-    private ArrayList<T> finalizedReads;
-
-    public PositionalDownsampler ( int targetCoverage ) {
-        this.targetCoverage = targetCoverage;
-        clear();
-    }
-
-    public void submit ( T newRead ) {
-        if ( readIsPastCurrentPosition(newRead) ) {
-            updateAndDownsamplePendingReads();
-        }
-
-        reservoir.submit(newRead);
-        updateCurrentPosition(newRead);
-    }
-
-    public void submit ( Collection<T> newReads ) {
-        for ( T read : newReads ) {
-            submit(read);
-        }
-    }
-
-    public boolean hasDownsampledItems() {
-        return finalizedReads.size() > 0;
-    }
-
-    public List<T> consumeDownsampledItems() {
-        List<T> toReturn = finalizedReads;
-        finalizedReads = new ArrayList<T>();
-        return toReturn;
-    }
-
-    public boolean hasPendingItems() {
-        return pendingReads.size() > 0;
-    }
-
-    public void signalEndOfInput() {
-        updateAndDownsamplePendingReads();
-
-        for ( PositionalReadGrouping group : pendingReads ) {
-            group.finalizeAllActiveReads();
-            finalizedReads.addAll(group.getFinalizedReads());
-        }
-
-        pendingReads.clear();
-    }
-
-    public void clear() {
-        reservoir = new ReservoirDownsampler<T>(targetCoverage);
-        pendingReads = new LinkedList<PositionalReadGrouping>();
-        finalizedReads = new ArrayList<T>();
-    }
-
-    public boolean requiresCoordinateSortOrder() {
-        return true;
-    }
-
-    private void updateCurrentPosition ( T read ) {
-        currentContigIndex = read.getReferenceIndex();
-        currentAlignmentStart = read.getAlignmentStart();
-    }
-
-    private boolean readIsPastCurrentPosition ( T read ) {
-        return read.getReferenceIndex() != currentContigIndex || read.getAlignmentStart() > currentAlignmentStart;
-    }
-
-    private void updateAndDownsamplePendingReads() {
-        finalizeOutOfScopeReads();
-
-        List<T> oldLocusReads = reservoir.consumeDownsampledItems();
-        pendingReads.add(new PositionalReadGrouping(oldLocusReads, currentContigIndex, currentAlignmentStart));
-
-        downsampleOverlappingGroups();
-    }
-
-    private void finalizeOutOfScopeReads() {
-        Iterator<PositionalReadGrouping> iter = pendingReads.iterator();
-        boolean noPrecedingUnfinalizedGroups = true;
-
-        while ( iter.hasNext() ) {
-            PositionalReadGrouping currentGroup = iter.next();
-            currentGroup.finalizeActiveReadsBeforePosition(currentContigIndex, currentAlignmentStart);
-
-            if ( currentGroup.isFinalized() && noPrecedingUnfinalizedGroups ) {
-                iter.remove();
-                finalizedReads.addAll(currentGroup.getFinalizedReads());
-            }
-            else {
-                noPrecedingUnfinalizedGroups = false;
-            }
-        }
-    }
-
-    private void downsampleOverlappingGroups() {
-        int[] groupReadCounts = new int[pendingReads.size()];
-        int totalCoverage = 0;
-        int numActiveGroups = 0;
-        int currentGroup = 0;
-
-        for ( PositionalReadGrouping group : pendingReads ) {
-            groupReadCounts[currentGroup] = group.numActiveReads();
-            totalCoverage += groupReadCounts[currentGroup];
-
-            if ( groupReadCounts[currentGroup] > 0 ) {
-                numActiveGroups++;
-            }
-
-            currentGroup++;
-        }
-
-        if ( totalCoverage <= targetCoverage ) {
-            return;
-        }
-
-        int numReadsToRemove = Math.min(totalCoverage - targetCoverage, totalCoverage - numActiveGroups);
-        currentGroup = 0;
-
-        while ( numReadsToRemove > 0  ) {
-            if ( groupReadCounts[currentGroup] > 1 ) {
-                groupReadCounts[currentGroup]--;
-                numReadsToRemove--;
-            }
-
-            currentGroup = (currentGroup + 1) % groupReadCounts.length;
-        }
-
-        currentGroup = 0;
-        for ( PositionalReadGrouping group : pendingReads ) {
-            if ( ! group.isFinalized() ) {
-                group.downsampleActiveReads(groupReadCounts[currentGroup]);
-            }
-            currentGroup++;
-        }
-    }
-
-    private class PositionalReadGrouping {
-        private List<T> activeReads;
-        private List<T> finalizedReads;
-
-        private int contig;
-        private int alignmentStart;
-
-        public PositionalReadGrouping( Collection<T> reads, int contig, int alignmentStart ) {
-            activeReads = new LinkedList<T>(reads);
-            finalizedReads = new ArrayList<T>();
-            this.contig = contig;
-            this.alignmentStart = alignmentStart;
-        }
-
-        public int numActiveReads() {
-            return activeReads.size();
-        }
-
-        public boolean isFinalized() {
-            return activeReads.size() == 0;
-        }
-
-        public List<T> getFinalizedReads() {
-            return finalizedReads;
-        }
-
-        public void finalizeActiveReadsBeforePosition( int contig, int position ) {
-            if ( this.contig != contig ) {
-                finalizeAllActiveReads();
-                return;
-            }
-
-            Iterator<T> iter = activeReads.iterator();
-
-            while ( iter.hasNext() ) {
-                T read = iter.next();
-                if ( read.getAlignmentEnd() < position ) {
-                    iter.remove();
-                    finalizedReads.add(read);
-                }
-            }
-        }
-
-        public void finalizeAllActiveReads() {
-            finalizedReads.addAll(activeReads);
-            activeReads.clear();
-        }
-
-        public void downsampleActiveReads( int numReadsToKeep ) {
-            if ( numReadsToKeep > activeReads.size() || numReadsToKeep < 0 ) {
-                throw new ReviewedStingException(String.format("Cannot retain %d reads out of %d total reads",
-                                                               numReadsToKeep, activeReads.size()));
-            }
-
-            BitSet itemsToKeep = new BitSet(activeReads.size());
-            for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(activeReads.size(), numReadsToKeep) ) {
-                itemsToKeep.set(selectedIndex);
-            }
-
-            int currentIndex = 0;
-            Iterator<T> iter = activeReads.iterator();
-
-            while ( iter.hasNext() ) {
-                T read = iter.next();
-
-                if ( ! itemsToKeep.get(currentIndex) ) {
-                    iter.remove();
-                }
-
-                currentIndex++;
-            }
-        }
-
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsampler.java
@ -33,8 +33,23 @@ import net.sf.samtools.SAMRecord;
 */
 public interface ReadsDownsampler<T extends SAMRecord> extends Downsampler<T> {

-    /*
+    /**
     * Does this downsampler require that reads be fed to it in coordinate order?
+     *
+     * @return true if reads must be submitted to this downsampler in coordinate order, otherwise false
     */
    public boolean requiresCoordinateSortOrder();
+
+    /**
+     * Tell this downsampler that no more reads located before the provided read (according to
+     * the sort order of the read stream) will be fed to it.
+     *
+     * Allows position-aware downsamplers to finalize pending reads earlier than they would
+     * otherwise be able to, particularly when doing per-sample downsampling and reads for
+     * certain samples are sparser than average.
+     *
+     * @param read the downsampler will assume that no reads located before this read will ever
+     *             be submitted to it in the future
+     */
+    public void signalNoMoreReadsBefore( T read );
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReadsDownsamplerFactory.java
@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+/**
+ * A ReadsDownsamplerFactory can be used to create an arbitrary number of instances of a particular
+ * downsampler, all sharing the same construction parameters.
+ *
+ * @author David Roazen
+ */
+public interface ReadsDownsamplerFactory<T extends SAMRecord> {
+    public ReadsDownsampler<T> newInstance();
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsampler.java
@ -48,6 +48,14 @@ public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampl

    private int totalReadsSeen;

+    private int numDiscardedItems;
+
+    /**
+     * Construct a ReservoirDownsampler
+     *
+     * @param targetSampleSize Size of the reservoir used by this downsampler. Number of items retained
+     *                         after downsampling will be min(totalReads, targetSampleSize)
+     */
    public ReservoirDownsampler ( int targetSampleSize ) {
        if ( targetSampleSize <= 0 ) {
            throw new ReviewedStingException("Cannot do reservoir downsampling with a sample size <= 0");
@ -55,6 +63,7 @@ public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampl

        this.targetSampleSize = targetSampleSize;
        clear();
+        reset();
    }

    public void submit ( T newRead ) {
@ -68,6 +77,7 @@ public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampl
            if ( randomSlot < targetSampleSize ) {
                reservoir.set(randomSlot, newRead);
            }
+            numDiscardedItems++;
        }
    }

@ -77,11 +87,12 @@ public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampl
        }
    }

-    public boolean hasDownsampledItems() {
+    public boolean hasFinalizedItems() {
        return reservoir.size() > 0;
    }

-    public List<T> consumeDownsampledItems() {
+    public List<T> consumeFinalizedItems() {
+        // pass by reference rather than make a copy, for speed
        List<T> downsampledItems = reservoir;
        clear();
        return downsampledItems;
@ -91,16 +102,36 @@ public class ReservoirDownsampler<T extends SAMRecord> implements ReadsDownsampl
        return false;
    }

+    public T peekFinalized() {
+        return reservoir.isEmpty() ? null : reservoir.get(0);
+    }
+
+    public T peekPending() {
+        return null;
+    }
+
+    public int getNumberOfDiscardedItems() {
+        return numDiscardedItems;
+    }
+
    public void signalEndOfInput() {
        // NO-OP
    }

    public void clear() {
        reservoir = new ArrayList<T>(targetSampleSize);
-        totalReadsSeen = 0;
+        totalReadsSeen = 0;    // an internal stat used by the downsampling process, so not cleared by reset() below
+    }
+
+    public void reset() {
+        numDiscardedItems = 0;
    }

    public boolean requiresCoordinateSortOrder() {
        return false;
    }
+
+    public void signalNoMoreReadsBefore( T read ) {
+        // NO-OP
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerFactory.java
@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Factory for creating ReservoirDownsamplers on demand
+ *
+ * @author David Roazen
+ */
+public class ReservoirDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
+
+    private int targetSampleSize;
+
+    public ReservoirDownsamplerFactory( int targetSampleSize ) {
+        this.targetSampleSize = targetSampleSize;
+    }
+
+    public ReadsDownsampler<T> newInstance() {
+        return new ReservoirDownsampler<T>(targetSampleSize);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsampler.java
@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+import java.util.*;
+
+/**
+ * Simple Positional Downsampler: Downsample each stack of reads at each alignment start to a size <= a target coverage
+ * using a Reservoir downsampler. Stores only O(target coverage) reads in memory at any given time.
+ *
+ * @author David Roazen
+ */
+public class SimplePositionalDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
+
+    private int targetCoverage;
+
+    private ReservoirDownsampler<T> reservoir;
+
+    private int currentContigIndex;
+
+    private int currentAlignmentStart;
+
+    private boolean positionEstablished;
+
+    private boolean unmappedReadsReached;
+
+    private ArrayList<T> finalizedReads;
+
+    private int numDiscardedItems;
+
+    /**
+     * Construct a SimplePositionalDownsampler
+     *
+     * @param targetCoverage Maximum number of reads that may share any given alignment start position
+     */
+    public SimplePositionalDownsampler( int targetCoverage ) {
+        this.targetCoverage = targetCoverage;
+        reservoir = new ReservoirDownsampler<T>(targetCoverage);
+        finalizedReads = new ArrayList<T>();
+        clear();
+        reset();
+    }
+
+    public void submit( T newRead ) {
+        updatePositionalState(newRead);
+
+        if ( unmappedReadsReached ) {    // don't downsample the unmapped reads at the end of the stream
+            finalizedReads.add(newRead);
+        }
+        else {
+            int reservoirPreviouslyDiscardedItems = reservoir.getNumberOfDiscardedItems();
+            reservoir.submit(newRead);
+            numDiscardedItems += reservoir.getNumberOfDiscardedItems() - reservoirPreviouslyDiscardedItems;
+        }
+    }
+
+    public void submit( Collection<T> newReads ) {
+        for ( T read : newReads ) {
+            submit(read);
+        }
+    }
+
+    public boolean hasFinalizedItems() {
+        return finalizedReads.size() > 0;
+    }
+
+    public List<T> consumeFinalizedItems() {
+        // pass by reference rather than make a copy, for speed
+        List<T> toReturn = finalizedReads;
+        finalizedReads = new ArrayList<T>();
+        return toReturn;
+    }
+
+    public boolean hasPendingItems() {
+        return reservoir.hasFinalizedItems();
+    }
+
+    public T peekFinalized() {
+        return finalizedReads.isEmpty() ? null : finalizedReads.get(0);
+    }
+
+    public T peekPending() {
+        return reservoir.peekFinalized();
+    }
+
+    public int getNumberOfDiscardedItems() {
+        return numDiscardedItems;
+    }
+
+    public void signalEndOfInput() {
+        finalizeReservoir();
+    }
+
+    public void clear() {
+        reservoir.clear();
+        reservoir.reset();
+        finalizedReads.clear();
+        positionEstablished = false;
+        unmappedReadsReached = false;
+    }
+
+    public void reset() {
+        numDiscardedItems = 0;
+    }
+
+    public boolean requiresCoordinateSortOrder() {
+        return true;
+    }
+
+    public void signalNoMoreReadsBefore( T read ) {
+        updatePositionalState(read);
+    }
+
+    private void updatePositionalState( T newRead ) {
+        if ( readIsPastCurrentPosition(newRead) ) {
+            if ( reservoir.hasFinalizedItems() ) {
+                finalizeReservoir();
+            }
+
+            setCurrentPosition(newRead);
+
+            if ( newRead.getReadUnmappedFlag() ) {
+                unmappedReadsReached = true;
+            }
+        }
+    }
+
+    private void setCurrentPosition( T read ) {
+        currentContigIndex = read.getReferenceIndex();
+        currentAlignmentStart = read.getAlignmentStart();
+        positionEstablished = true;
+    }
+
+    private boolean readIsPastCurrentPosition( T read ) {
+        return ! positionEstablished ||
+               read.getReferenceIndex() > currentContigIndex ||
+               read.getAlignmentStart() > currentAlignmentStart ||
+               (read.getReadUnmappedFlag() && ! unmappedReadsReached);
+    }
+
+    private void finalizeReservoir() {
+        finalizedReads.addAll(reservoir.consumeFinalizedItems());
+        reservoir.reset();
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerFactory.java
@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+/**
+ * Factory for creating SimplePositionalDownsamplers on demand
+ *
+ * @author David Roazen
+ */
+public class SimplePositionalDownsamplerFactory<T extends SAMRecord> implements ReadsDownsamplerFactory<T> {
+
+    private int targetCoverage;
+
+    public SimplePositionalDownsamplerFactory( int targetCoverage ) {
+        this.targetCoverage = targetCoverage;
+    }
+
+    public ReadsDownsampler<T> newInstance() {
+        return new SimplePositionalDownsampler<T>(targetCoverage);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -76,21 +77,21 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
    /**
     * Create a new hierarchical microscheduler to process the given reads and reference.
     *
-     * @param walker        the walker used to process the dataset.
-     * @param reads         Reads file(s) to process.
-     * @param reference     Reference for driving the traversal.
-     * @param nThreadsToUse maximum number of threads to use to do the work
+     * @param walker           the walker used to process the dataset.
+     * @param reads            Reads file(s) to process.
+     * @param reference        Reference for driving the traversal.
+     * @param threadAllocation How should we apply multi-threaded execution?
     */
    protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
                                         final Walker walker,
                                         final SAMDataSource reads,
                                         final IndexedFastaSequenceFile reference,
                                         final Collection<ReferenceOrderedDataSource> rods,
-                                         final int nThreadsToUse,
-                                         final boolean monitorThreadPerformance ) {
-        super(engine, walker, reads, reference, rods, nThreadsToUse);
+                                         final ThreadAllocation threadAllocation) {
+        super(engine, walker, reads, reference, rods, threadAllocation);

-        if ( monitorThreadPerformance ) {
+        final int nThreadsToUse = threadAllocation.getNumDataThreads();
+        if ( threadAllocation.monitorThreadEfficiency() ) {
            final EfficiencyMonitoringThreadFactory monitoringThreadFactory = new EfficiencyMonitoringThreadFactory(nThreadsToUse);
            setThreadEfficiencyMonitor(monitoringThreadFactory);
            this.threadPool = Executors.newFixedThreadPool(nThreadsToUse, monitoringThreadFactory);
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.SampleUtils;
@ -39,13 +40,11 @@ public class LinearMicroScheduler extends MicroScheduler {
                                   final SAMDataSource reads,
                                   final IndexedFastaSequenceFile reference,
                                   final Collection<ReferenceOrderedDataSource> rods,
-                                   final int numThreads, // may be > 1 if are nanoScheduling
-                                   final boolean monitorThreadPerformance ) {
-        super(engine, walker, reads, reference, rods, numThreads);
+                                   final ThreadAllocation threadAllocation) {
+        super(engine, walker, reads, reference, rods, threadAllocation);

-        if ( monitorThreadPerformance )
+        if ( threadAllocation.monitorThreadEfficiency() )
            setThreadEfficiencyMonitor(new ThreadEfficiencyMonitor());
-
    }

    /**
@ -60,11 +59,12 @@ public class LinearMicroScheduler extends MicroScheduler {

        boolean done = walker.isDone();
        int counter = 0;
+
+        traversalEngine.startTimersIfNecessary();
        for (Shard shard : shardStrategy ) {
            if ( done || shard == null ) // we ran out of shards that aren't owned
                break;

-            traversalEngine.startTimersIfNecessary();
            if(shard.getShardType() == Shard.ShardType.LOCUS) {
                WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
                        getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -59,6 +59,8 @@ import java.util.Collection;

 /** Shards and schedules data in manageable chunks. */
 public abstract class MicroScheduler implements MicroSchedulerMBean {
+    // TODO -- remove me and retire non nano scheduled versions of traversals
+    private final static boolean USE_NANOSCHEDULER_FOR_EVERYTHING = true;
    protected static final Logger logger = Logger.getLogger(MicroScheduler.class);

    /**
@ -100,27 +102,36 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
     * @return The best-fit microscheduler.
     */
    public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
-        if (threadAllocation.getNumCPUThreads() > 1) {
+        if ( threadAllocation.isRunningInParallelMode() ) {
+            // TODO -- remove me when we fix running NCT within HMS
+            if ( threadAllocation.getNumDataThreads() > 1 && threadAllocation.getNumCPUThreadsPerDataThread() > 1)
+                throw new UserException("Currently the GATK does not support running CPU threads within data threads, " +
+                        "please specify only one of NT and NCT");
+
+            logger.info(String.format("Running the GATK in parallel mode with %d CPU thread(s) for each of %d data thread(s)",
+                    threadAllocation.getNumCPUThreadsPerDataThread(), threadAllocation.getNumDataThreads()));
+        }
+
+        if ( threadAllocation.getNumDataThreads() > 1 ) {
            if (walker.isReduceByInterval())
                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval.  Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));

-            logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
-
-            if ( walker instanceof ReadWalker ) {
-                if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
-                return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+            if ( ! (walker instanceof TreeReducible) ) {
+                throw badNT("nt", engine, walker);
            } else {
-                // TODO -- update test for when nano scheduling only is an option
-                if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
-                return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+                return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
            }
        } else {
-            return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+            if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) )
+                throw badNT("nct", engine, walker);
+            return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
        }
    }

-    private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
-        throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
+    private static UserException badNT(final String parallelArg, final GenomeAnalysisEngine engine, final Walker walker) {
+        throw new UserException.BadArgumentValue("nt",
+                String.format("The analysis %s currently does not support parallel execution with %s.  " +
+                        "Please run your analysis without the %s option.", engine.getWalkerName(walker.getClass()), parallelArg, parallelArg));
    }

    /**
@ -130,23 +141,27 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
     * @param reads   The reads.
     * @param reference The reference.
     * @param rods    the rods to include in the traversal
-     * @param numThreads the number of threads we are using in the underlying traversal
+     * @param threadAllocation the allocation of threads to use in the underlying traversal
     */
    protected MicroScheduler(final GenomeAnalysisEngine engine,
                             final Walker walker,
                             final SAMDataSource reads,
                             final IndexedFastaSequenceFile reference,
                             final Collection<ReferenceOrderedDataSource> rods,
-                             final int numThreads) {
+                             final ThreadAllocation threadAllocation) {
        this.engine = engine;
        this.reads = reads;
        this.reference = reference;
        this.rods = rods;

        if (walker instanceof ReadWalker) {
-            traversalEngine = numThreads > 1 ? new TraverseReadsNano(numThreads) : new TraverseReads();
+            traversalEngine = USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1
+                    ? new TraverseReadsNano(threadAllocation.getNumCPUThreadsPerDataThread())
+                    : new TraverseReads();
        } else if (walker instanceof LocusWalker) {
-            traversalEngine = new TraverseLoci();
+            traversalEngine = USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1
+                    ? new TraverseLociNano(threadAllocation.getNumCPUThreadsPerDataThread())
+                    : new TraverseLociLinear();
        } else if (walker instanceof DuplicateWalker) {
            traversalEngine = new TraverseDuplicates();
        } else if (walker instanceof ReadPairWalker) {
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
+import org.broadinstitute.sting.gatk.iterators.LocusIteratorByStateExperimental;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -81,7 +82,13 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
    public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals, Collection<String> sampleNames) {
        this.sourceInfo = shard.getReadProperties();
        this.readIterator = iterator;
-        this.sourceIterator = new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
+
+        // Temporary: use the experimental version of LocusIteratorByState if experimental downsampling was requested:
+        this.sourceIterator = sourceInfo.getDownsamplingMethod().useExperimentalDownsampling ?
+                              new PeekableIterator<AlignmentContext>(new LocusIteratorByStateExperimental(iterator,sourceInfo,genomeLocParser, sampleNames))
+                              :
+                              new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
+
        this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/FilterManager.java
@ -29,6 +29,7 @@ import com.google.common.base.Function;
 import com.google.common.collect.Collections2;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.classloader.PluginManager;
+import org.broadinstitute.sting.utils.help.GATKDocUtils;

 import java.util.Collection;
 import java.util.List;
@ -68,16 +69,29 @@ public class FilterManager extends PluginManager<ReadFilter> {
    @Override
    protected String formatErrorMessage(String pluginCategory, String pluginName) {
        List<Class<? extends ReadFilter>> availableFilters = this.getPluginsImplementing(ReadFilter.class);
-        Collection<String> availableFilterNames = Collections2.transform(availableFilters, new Function<Class<? extends ReadFilter>,String>(){

-            @Override
-            public String apply(final Class<? extends ReadFilter> input) {
-                return getName(input);
-            }
-        });

-        return String.format("Read filter %s not found. Available read filters:%n%s.%n%n%s",pluginName,
-                Utils.join(String.format(", "),availableFilterNames),
+        return String.format("Read filter %s not found. Available read filters:%n%n%s%n%n%s",pluginName,
+                userFriendlyListofReadFilters(availableFilters),
                "Please consult the GATK Documentation (http://www.broadinstitute.org/gatk/gatkdocs/) for more information.");
    }
+
+    private String userFriendlyListofReadFilters(List<Class<? extends ReadFilter>> filters) {
+        final String headName = "FilterName", headDoc = "Documentation";
+        int longestNameLength = -1;
+        for ( Class < ? extends ReadFilter> filter : filters ) {
+            longestNameLength = Math.max(longestNameLength,this.getName(filter).length());
+        }
+        String format = "   %"+longestNameLength+"s        %s%n";
+
+        StringBuilder listBuilder = new StringBuilder();
+        listBuilder.append(String.format(format,headName,headDoc));
+        for ( Class<? extends ReadFilter> filter : filters ) {
+            String helpLink = GATKDocUtils.helpLinksToGATKDocs(filter);
+            String filterName = this.getName(filter);
+            listBuilder.append(String.format(format,filterName,helpLink));
+        }
+
+        return listBuilder.toString();
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
@ -32,9 +32,9 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.writer.Options;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;

 import java.io.File;
@ -269,7 +269,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
     * @return
     */
    public boolean alsoWriteBCFForTest() {
-        return engine.getArguments().numberOfThreads == 1 && // only works single threaded
+        return engine.getArguments().numberOfDataThreads == 1 && // only works single threaded
                ! isCompressed() && // for non-compressed outputs
                getFile() != null && // that are going to disk
                engine.getArguments().generateShadowBCF; // and we actually want to do it
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyDownsampleIterator.java
@ -6,13 +6,13 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import java.util.Iterator;


-public class DownsampleIterator implements StingSAMIterator {
+public class LegacyDownsampleIterator implements StingSAMIterator {

    StingSAMIterator it;
    int cutoff;
    SAMRecord next;

-    public DownsampleIterator(StingSAMIterator it, double fraction) {
+    public LegacyDownsampleIterator(StingSAMIterator it, double fraction) {
        this.it = it;
        cutoff = (int)(fraction * 10000);
        next = getNextRecord();
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@ -31,8 +31,8 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.DownsampleType;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.GenomeLoc;
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java
@ -0,0 +1,649 @@
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.iterators;
+
+import net.sf.picard.util.PeekableIterator;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMRecord;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.ReadProperties;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.Downsampler;
+import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+
+import java.util.*;
+
+/**
+ * Iterator that traverses a SAM File, accumulating information on a per-locus basis
+ */
+public class LocusIteratorByStateExperimental extends LocusIterator {
+    /**
+     * our log, which we want to capture anything from this class
+     */
+    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // member fields
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Used to create new GenomeLocs.
+     */
+    private final GenomeLocParser genomeLocParser;
+    private final ArrayList<String> samples;
+    private final ReadStateManager readStates;
+
+    protected static class SAMRecordState {
+        SAMRecord read;
+        int readOffset = -1;                    // how far are we offset from the start of the read bases?
+        int genomeOffset = -1;                  // how far are we offset from the alignment start on the genome?
+
+        Cigar cigar = null;
+        int cigarOffset = -1;
+        CigarElement curElement = null;
+        int nCigarElements = 0;
+
+        int cigarElementCounter = -1;           // how far are we into a single cigarElement
+
+        // The logical model for generating extended events is as follows: the "record state" implements the traversal
+        // along the reference; thus stepForwardOnGenome() returns on every and only on actual reference bases. This
+        // can be a (mis)match or a deletion (in the latter case, we still return on every individual reference base the
+        // deletion spans). In the extended events mode, the record state also remembers if there was an insertion, or
+        // if the deletion just started *right before* the current reference base the record state is pointing to upon the return from
+        // stepForwardOnGenome(). The next call to stepForwardOnGenome() will clear that memory (as we remember only extended
+        // events immediately preceding the current reference base).
+
+        public SAMRecordState(SAMRecord read) {
+            this.read = read;
+            cigar = read.getCigar();
+            nCigarElements = cigar.numCigarElements();
+
+            //System.out.printf("Creating a SAMRecordState: %s%n", this);
+        }
+
+        public SAMRecord getRead() {
+            return read;
+        }
+
+        /**
+         * What is our current offset in the read's bases that aligns us with the reference genome?
+         *
+         * @return
+         */
+        public int getReadOffset() {
+            return readOffset;
+        }
+
+        /**
+         * What is the current offset w.r.t. the alignment state that aligns us to the readOffset?
+         *
+         * @return
+         */
+        public int getGenomeOffset() {
+            return genomeOffset;
+        }
+
+        public int getGenomePosition() {
+            return read.getAlignmentStart() + getGenomeOffset();
+        }
+
+        public GenomeLoc getLocation(GenomeLocParser genomeLocParser) {
+            return genomeLocParser.createGenomeLoc(read.getReferenceName(), getGenomePosition());
+        }
+
+        public CigarOperator getCurrentCigarOperator() {
+            return curElement.getOperator();
+        }
+
+        public String toString() {
+            return String.format("%s ro=%d go=%d co=%d cec=%d %s", read.getReadName(), readOffset, genomeOffset, cigarOffset, cigarElementCounter, curElement);
+        }
+
+        public CigarElement peekForwardOnGenome() {
+            return ( cigarElementCounter + 1 > curElement.getLength() && cigarOffset + 1 < nCigarElements ? cigar.getCigarElement(cigarOffset + 1) : curElement );
+        }
+
+        public CigarElement peekBackwardOnGenome() {
+            return ( cigarElementCounter - 1 == 0 && cigarOffset - 1 > 0 ? cigar.getCigarElement(cigarOffset - 1) : curElement );
+        }
+
+        
+        public CigarOperator stepForwardOnGenome() {
+            // we enter this method with readOffset = index of the last processed base on the read
+            // (-1 if we did not process a single base yet); this can be last matching base, or last base of an insertion
+
+
+            if (curElement == null || ++cigarElementCounter > curElement.getLength()) {
+                cigarOffset++;
+                if (cigarOffset < nCigarElements) {
+                    curElement = cigar.getCigarElement(cigarOffset);
+                    cigarElementCounter = 0;
+                    // next line: guards against cigar elements of length 0; when new cigar element is retrieved,
+                    // we reenter in order to re-check cigarElementCounter against curElement's length
+                    return stepForwardOnGenome();
+                } else {
+                    if (curElement != null && curElement.getOperator() == CigarOperator.D)
+                        throw new UserException.MalformedBAM(read, "read ends with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
+                        
+                    // Reads that contain indels model the genomeOffset as the following base in the reference.  Because
+                    // we fall into this else block only when indels end the read, increment genomeOffset  such that the
+                    // current offset of this read is the next ref base after the end of the indel.  This position will
+                    // model a point on the reference somewhere after the end of the read.
+                    genomeOffset++; // extended events need that. Logically, it's legal to advance the genomic offset here:
+                    // we do step forward on the ref, and by returning null we also indicate that we are past the read end.
+
+                    return null;
+                }
+            }
+
+            boolean done = false;
+            switch (curElement.getOperator()) {
+                case H: // ignore hard clips
+                case P: // ignore pads
+                    cigarElementCounter = curElement.getLength();
+                    break;
+                case I: // insertion w.r.t. the reference
+                case S: // soft clip
+                    cigarElementCounter = curElement.getLength();
+                    readOffset += curElement.getLength();
+                    break;
+                case D: // deletion w.r.t. the reference
+                    if (readOffset < 0)             // we don't want reads starting with deletion, this is a malformed cigar string
+                        throw new UserException.MalformedBAM(read, "read starts with deletion. Cigar: " + read.getCigarString() + ". Although the SAM spec technically permits such reads, this is often indicative of malformed files. If you are sure you want to use this file, re-run your analysis with the extra option: -rf BadCigar");
+                    // should be the same as N case
+                    genomeOffset++;
+                    done = true;
+                    break;
+                case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
+                    genomeOffset++;
+                    done = true;
+                    break;
+                case M:
+                case EQ:
+                case X:
+                    readOffset++;
+                    genomeOffset++;
+                    done = true;
+                    break;
+                default:
+                    throw new IllegalStateException("Case statement didn't deal with cigar op: " + curElement.getOperator());
+            }
+
+            return done ? curElement.getOperator() : stepForwardOnGenome();
+        }
+    }
+
+    //final boolean DEBUG = false;
+    //final boolean DEBUG2 = false && DEBUG;
+    private ReadProperties readInfo;
+    private AlignmentContext nextAlignmentContext;
+    private boolean performLevelingDownsampling;
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // constructors and other basic operations
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+
+    public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
+        this.readInfo = readInformation;
+        this.genomeLocParser = genomeLocParser;
+        this.samples = new ArrayList<String>(samples);
+        this.readStates = new ReadStateManager(samIterator);
+
+        this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
+                                           readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
+                                           readInfo.getDownsamplingMethod().toCoverage != null;
+
+        // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
+        // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
+        if (this.samples.isEmpty() && samIterator.hasNext()) {
+            throw new IllegalArgumentException("samples list must not be empty");
+        }
+    }
+
+    /**
+     * For testing only.  Assumes that the incoming SAMRecords have no read groups, so creates a dummy sample list
+     * for the system.
+     */
+    public final static Collection<String> sampleListForSAMWithoutReadGroups() {
+        List<String> samples = new ArrayList<String>();
+        samples.add(null);
+        return samples;
+    }
+
+    public Iterator<AlignmentContext> iterator() {
+        return this;
+    }
+
+    public void close() {
+        //this.it.close();
+    }
+
+    public boolean hasNext() {
+        lazyLoadNextAlignmentContext();
+        return (nextAlignmentContext != null);
+        //if ( DEBUG ) System.out.printf("hasNext() = %b%n", r);
+    }
+
+    private GenomeLoc getLocation() {
+        return readStates.isEmpty() ? null : readStates.getFirst().getLocation(genomeLocParser);
+    }
+
+    // -----------------------------------------------------------------------------------------------------------------
+    //
+    // next() routine and associated collection operations
+    //
+    // -----------------------------------------------------------------------------------------------------------------
+    public AlignmentContext next() {
+        lazyLoadNextAlignmentContext();
+        if (!hasNext())
+            throw new NoSuchElementException("LocusIteratorByState: out of elements.");
+        AlignmentContext currentAlignmentContext = nextAlignmentContext;
+        nextAlignmentContext = null;
+        return currentAlignmentContext;
+    }
+
+    /**
+     * Creates the next alignment context from the given state.  Note that this is implemented as a lazy load method.
+     * nextAlignmentContext MUST BE null in order for this method to advance to the next entry.
+     */
+    private void lazyLoadNextAlignmentContext() {
+        while (nextAlignmentContext == null && readStates.hasNext()) {
+            readStates.collectPendingReads();
+
+            final GenomeLoc location = getLocation();
+            final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
+
+            // TODO: How can you determine here whether the current pileup has been downsampled?
+            boolean hasBeenSampled = false;
+
+            for (final String sample : samples) {
+                final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
+                final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
+
+                int size = 0;                                                           // number of elements in this sample's pileup
+                int nDeletions = 0;                                                     // number of deletions in this sample's pileup
+                int nMQ0Reads = 0;                                                      // number of MQ0 reads in this sample's pileup (warning: current implementation includes N bases that are MQ0)
+
+                while (iterator.hasNext()) {
+                    final SAMRecordState state = iterator.next();                   // state object with the read/offset information
+                    final GATKSAMRecord read = (GATKSAMRecord) state.getRead();     // the actual read
+                    final CigarOperator op = state.getCurrentCigarOperator();       // current cigar operator
+                    final CigarElement nextElement = state.peekForwardOnGenome();   // next cigar element
+                    final CigarElement lastElement = state.peekBackwardOnGenome();  // last cigar element
+                    final boolean isSingleElementCigar = nextElement == lastElement;
+                    final CigarOperator nextOp = nextElement.getOperator();         // next cigar operator
+                    final CigarOperator lastOp = lastElement.getOperator();         // last cigar operator
+                    int readOffset = state.getReadOffset();                         // the base offset on this read
+
+                    final boolean isBeforeDeletion  = nextOp == CigarOperator.DELETION;
+                    final boolean isAfterDeletion   = lastOp == CigarOperator.DELETION;
+                    final boolean isBeforeInsertion = nextOp == CigarOperator.INSERTION;
+                    final boolean isAfterInsertion  = lastOp == CigarOperator.INSERTION && !isSingleElementCigar;
+                    final boolean isNextToSoftClip  = nextOp == CigarOperator.S || (state.getGenomeOffset() == 0 && read.getSoftStart() != read.getAlignmentStart());
+
+                    int nextElementLength = nextElement.getLength();
+
+                    if (op == CigarOperator.N)                                      // N's are never added to any pileup
+                        continue;
+
+                    if (op == CigarOperator.D) {
+                        // TODO -- LIBS is totally busted for deletions so that reads with Ds right before Is in their CIGAR are broken; must fix
+                        if (readInfo.includeReadsWithDeletionAtLoci()) {            // only add deletions to the pileup if we are authorized to do so
+                            pile.add(new PileupElement(read, readOffset, true, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, null, nextOp == CigarOperator.D ? nextElementLength : -1));
+                            size++;
+                            nDeletions++;
+                            if (read.getMappingQuality() == 0)
+                                nMQ0Reads++;
+                        }
+                    }
+                    else {
+                        if (!filterBaseInRead(read, location.getStart())) {
+                            String insertedBaseString = null;
+                            if (nextOp == CigarOperator.I) {
+                                final int insertionOffset = isSingleElementCigar ? 0 : 1;
+                                // TODO -- someone please implement a better fix for the single element insertion CIGAR!
+                                if (isSingleElementCigar)
+                                    readOffset -= (nextElement.getLength() - 1); // LIBS has passed over the insertion bases!
+                                insertedBaseString = new String(Arrays.copyOfRange(read.getReadBases(), readOffset + insertionOffset, readOffset + insertionOffset + nextElement.getLength()));
+                            }
+
+                            pile.add(new PileupElement(read, readOffset, false, isBeforeDeletion, isAfterDeletion, isBeforeInsertion, isAfterInsertion, isNextToSoftClip, insertedBaseString, nextElementLength));
+                            size++;
+                            if (read.getMappingQuality() == 0)
+                                nMQ0Reads++;
+                        }
+                    }
+                }
+
+                if (pile.size() != 0)                                             // if this pileup added at least one base, add it to the full pileup
+                    fullPileup.put(sample, new ReadBackedPileupImpl(location, pile, size, nDeletions, nMQ0Reads));
+            }
+
+            updateReadStates();                                                   // critical - must be called after we get the current state offsets and location
+            if (!fullPileup.isEmpty())                                            // if we got reads with non-D/N over the current position, we are done
+                nextAlignmentContext = new AlignmentContext(location, new ReadBackedPileupImpl(location, fullPileup), hasBeenSampled);
+        }
+    }
+
+    // fast testing of position
+    private boolean readIsPastCurrentPosition(SAMRecord read) {
+        if (readStates.isEmpty())
+            return false;
+        else {
+            SAMRecordState state = readStates.getFirst();
+            SAMRecord ourRead = state.getRead();
+            return read.getReferenceIndex() > ourRead.getReferenceIndex() || read.getAlignmentStart() > state.getGenomePosition();
+        }
+    }
+
+    /**
+     * Generic place to put per-base filters appropriate to LocusIteratorByState
+     *
+     * @param rec
+     * @param pos
+     * @return
+     */
+    private static boolean filterBaseInRead(GATKSAMRecord rec, long pos) {
+        return ReadUtils.isBaseInsideAdaptor(rec, pos);
+    }
+
+    private void updateReadStates() {
+        for (final String sample : samples) {
+            Iterator<SAMRecordState> it = readStates.iterator(sample);
+            while (it.hasNext()) {
+                SAMRecordState state = it.next();
+                CigarOperator op = state.stepForwardOnGenome();
+                if (op == null) {
+                    // we discard the read only when we are past its end AND indel at the end of the read (if any) was
+                    // already processed. Keeping the read state that returned null upon stepForwardOnGenome() is safe
+                    // as the next call to stepForwardOnGenome() will return null again AND will clear hadIndel() flag.
+                    it.remove();                                                // we've stepped off the end of the object
+                }
+            }
+        }
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
+    }
+
+    protected class ReadStateManager {
+        private final PeekableIterator<SAMRecord> iterator;
+        private final SamplePartitioner samplePartitioner;
+        private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
+        private int totalReadStates = 0;
+
+        public ReadStateManager(Iterator<SAMRecord> source) {
+            this.iterator = new PeekableIterator<SAMRecord>(source);
+
+            for (final String sample : samples) {
+                readStatesBySample.put(sample, new PerSampleReadStateManager());
+            }
+
+            samplePartitioner = new SamplePartitioner();
+        }
+
+        /**
+         * Returns a iterator over all the reads associated with the given sample.  Note that remove() is implemented
+         * for this iterator; if present, total read states will be decremented.
+         *
+         * @param sample The sample.
+         * @return Iterator over the reads associated with that sample.
+         */
+        public Iterator<SAMRecordState> iterator(final String sample) {
+            return new Iterator<SAMRecordState>() {
+                private Iterator<SAMRecordState> wrappedIterator = readStatesBySample.get(sample).iterator();
+
+                public boolean hasNext() {
+                    return wrappedIterator.hasNext();
+                }
+
+                public SAMRecordState next() {
+                    return wrappedIterator.next();
+                }
+
+                public void remove() {
+                    wrappedIterator.remove();
+                }
+            };
+        }
+
+        public boolean isEmpty() {
+            return totalReadStates == 0;
+        }
+
+        /**
+         * Retrieves the total number of reads in the manager across all samples.
+         *
+         * @return Total number of reads over all samples.
+         */
+        public int size() {
+            return totalReadStates;
+        }
+
+        /**
+         * Retrieves the total number of reads in the manager in the given sample.
+         *
+         * @param sample The sample.
+         * @return Total number of reads in the given sample.
+         */
+        public int size(final String sample) {
+            return readStatesBySample.get(sample).size();
+        }
+
+        public SAMRecordState getFirst() {
+            for (final String sample : samples) {
+                PerSampleReadStateManager reads = readStatesBySample.get(sample);
+                if (!reads.isEmpty())
+                    return reads.peek();
+            }
+            return null;
+        }
+
+        public boolean hasNext() {
+            return totalReadStates > 0 || iterator.hasNext();
+        }
+
+        public void collectPendingReads() {
+            if (!iterator.hasNext())
+                return;
+
+            if (readStates.size() == 0) {
+                int firstContigIndex = iterator.peek().getReferenceIndex();
+                int firstAlignmentStart = iterator.peek().getAlignmentStart();
+                while (iterator.hasNext() && iterator.peek().getReferenceIndex() == firstContigIndex && iterator.peek().getAlignmentStart() == firstAlignmentStart) {
+                    samplePartitioner.submitRead(iterator.next());
+                }
+            } else {
+                // Fast fail in the case that the read is past the current position.
+                if (readIsPastCurrentPosition(iterator.peek()))
+                    return;
+
+                while (iterator.hasNext() && !readIsPastCurrentPosition(iterator.peek())) {
+                    samplePartitioner.submitRead(iterator.next());
+                }
+            }
+
+            for (final String sample : samples) {
+                Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
+                PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
+                addReadsToSample(statesBySample, newReads);
+            }
+
+            samplePartitioner.reset();
+        }
+
+        /**
+         * Add reads with the given sample name to the given hanger entry.
+         *
+         * @param readStates The list of read states to add this collection of reads.
+         * @param reads      Reads to add.  Selected reads will be pulled from this source.
+         */
+        private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
+            if (reads.isEmpty())
+                return;
+
+            Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
+
+            for (SAMRecord read : reads) {
+                SAMRecordState state = new SAMRecordState(read);
+                state.stepForwardOnGenome();
+                newReadStates.add(state);
+            }
+
+            readStates.addStatesAtNextAlignmentStart(newReadStates);
+        }
+
+        protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
+            private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
+            private int thisSampleReadStates = 0;
+            private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
+                      performLevelingDownsampling ?
+                      new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
+                      null;
+
+            public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
+                if ( states.isEmpty() ) {
+                    return;
+                }
+
+                readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
+                thisSampleReadStates += states.size();
+                totalReadStates += states.size();
+
+                if ( levelingDownsampler != null ) {
+                    levelingDownsampler.submit(readStatesByAlignmentStart);
+                    levelingDownsampler.signalEndOfInput();
+
+                    thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
+                    totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
+
+                    // use returned List directly rather than make a copy, for efficiency's sake
+                    readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
+                    levelingDownsampler.reset();
+                }
+            }
+
+            public boolean isEmpty() {
+                return readStatesByAlignmentStart.isEmpty();
+            }
+
+            public SAMRecordState peek() {
+                return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
+            }
+
+            public int size() {
+                return thisSampleReadStates;
+            }
+
+            public Iterator<SAMRecordState> iterator() {
+                return new Iterator<SAMRecordState>() {
+                    private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
+                    private LinkedList<SAMRecordState> currentPositionReadStates = null;
+                    private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
+
+                    public boolean hasNext() {
+                        return  alignmentStartIterator.hasNext() ||
+                                (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
+                    }
+
+                    public SAMRecordState next() {
+                        if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
+                            currentPositionReadStates = alignmentStartIterator.next();
+                            currentPositionReadStatesIterator = currentPositionReadStates.iterator();
+                        }
+
+                        return currentPositionReadStatesIterator.next();
+                    }
+
+                    public void remove() {
+                        currentPositionReadStatesIterator.remove();
+                        thisSampleReadStates--;
+                        totalReadStates--;
+
+                        if ( currentPositionReadStates.isEmpty() ) {
+                            alignmentStartIterator.remove();
+                        }
+                    }
+                };
+            }
+        }
+    }
+
+    /**
+     * Note: stores reads by sample ID string, not by sample object
+     */
+    private class SamplePartitioner {
+        private Map<String, Collection<SAMRecord>> readsBySample;
+        private long readsSeen = 0;
+
+        public SamplePartitioner() {
+            readsBySample = new HashMap<String, Collection<SAMRecord>>();
+
+            for ( String sample : samples ) {
+                readsBySample.put(sample, new ArrayList<SAMRecord>());
+            }
+        }
+
+        public void submitRead(SAMRecord read) {
+            String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+            if (readsBySample.containsKey(sampleName))
+                readsBySample.get(sampleName).add(read);
+            readsSeen++;
+        }
+
+        public long getNumReadsSeen() {
+            return readsSeen;
+        }
+
+        public Collection<SAMRecord> getReadsForSample(String sampleName) {
+            if ( ! readsBySample.containsKey(sampleName) )
+                throw new NoSuchElementException("Sample name not found");
+            return readsBySample.get(sampleName);
+        }
+
+        public void reset() {
+            for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
+                perSampleReads.clear();
+            readsSeen = 0;
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/VerifyingSamIterator.java
@ -10,13 +10,11 @@ import java.util.Iterator;
 * Verifies that the incoming stream of reads is correctly sorted
 */
 public class VerifyingSamIterator implements StingSAMIterator {
-    private GenomeLocParser genomeLocParser;
    StingSAMIterator it;
    SAMRecord last = null;
    boolean checkOrderP = true;

-    public VerifyingSamIterator(GenomeLocParser genomeLocParser,StingSAMIterator it) {
-        this.genomeLocParser = genomeLocParser;
+    public VerifyingSamIterator(StingSAMIterator it) {
        this.it = it;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@ -218,7 +218,7 @@ public class GATKRunReport {
        // if there was an exception, capture it
        this.mException = e == null ? null : new ExceptionToXML(e);

-        numThreads = engine.getArguments().numberOfThreads;
+        numThreads = engine.getTotalNumberOfThreads();
        percentTimeRunning = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.USER_CPU);
        percentTimeBlocking = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.BLOCKING);
        percentTimeWaiting = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING);
--- a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
@ -24,7 +24,7 @@

 package org.broadinstitute.sting.gatk.resourcemanagement;

-import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 /**
 * Models how threads are distributed between various components of the GATK.
@ -33,7 +33,12 @@ public class ThreadAllocation {
    /**
     * The number of CPU threads to be used by the GATK.
     */
-    private final int numCPUThreads;
+    private final int numDataThreads;
+
+    /**
+     * The number of CPU threads per data thread for GATK processing
+     */
+    private final int numCPUThreadsPerDataThread;

    /**
     * Number of threads to devote exclusively to IO.  Default is 0.
@ -45,8 +50,12 @@ public class ThreadAllocation {
     */
    private final boolean monitorEfficiency;

-    public int getNumCPUThreads() {
-        return numCPUThreads;
+    public int getNumDataThreads() {
+        return numDataThreads;
+    }
+
+    public int getNumCPUThreadsPerDataThread() {
+        return numCPUThreadsPerDataThread;
    }

    public int getNumIOThreads() {
@ -57,47 +66,50 @@ public class ThreadAllocation {
        return monitorEfficiency;
    }

+    /**
+     * Are we running in parallel mode?
+     *
+     * @return true if any parallel processing is enabled
+     */
+    public boolean isRunningInParallelMode() {
+        return getTotalNumThreads() > 1;
+    }
+
+    /**
+     * What is the total number of threads in use by the GATK?
+     *
+     * @return the sum of all thread allocations in this object
+     */
+    public int getTotalNumThreads() {
+        return getNumDataThreads() * getNumCPUThreadsPerDataThread() + getNumIOThreads();
+    }
+
    /**
     * Construct the default thread allocation.
     */
    public ThreadAllocation() {
-        this(1, null, null, false);
+        this(1, 1, 0, false);
    }

    /**
     * Set up the thread allocation.  Default allocation is 1 CPU thread, 0 IO threads.
     * (0 IO threads means that no threads are devoted exclusively to IO; they're inline on the CPU thread).
-     * @param totalThreads Complete number of threads to allocate.
-     * @param numCPUThreads Total number of threads allocated to the traversal.
+     * @param numDataThreads Total number of threads allocated to the traversal.
+     * @param numCPUThreadsPerDataThread The number of CPU threads per data thread to allocate
     * @param numIOThreads Total number of threads allocated exclusively to IO.
+     * @param monitorEfficiency should we monitor threading efficiency in the GATK?
     */
-    public ThreadAllocation(final int totalThreads, final Integer numCPUThreads, final Integer numIOThreads, final boolean monitorEfficiency) {
-        // If no allocation information is present, allocate all threads to CPU
-        if(numCPUThreads == null && numIOThreads == null) {
-            this.numCPUThreads = totalThreads;
-            this.numIOThreads = 0;
-        }
-        // If only CPU threads are specified, allocate remainder to IO (minimum 0 dedicated IO threads).
-        else if(numIOThreads == null) {
-            if(numCPUThreads > totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) is higher than the total threads",totalThreads,numCPUThreads));
-            this.numCPUThreads = numCPUThreads;
-            this.numIOThreads = totalThreads - numCPUThreads;
-        }
-        // If only IO threads are specified, allocate remainder to CPU (minimum 1 dedicated CPU thread).
-        else if(numCPUThreads == null) {
-            if(numIOThreads > totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of io threads (%d) is higher than the total threads",totalThreads,numIOThreads));
-            this.numCPUThreads = Math.max(1,totalThreads-numIOThreads);
-            this.numIOThreads = numIOThreads;
-        }
-        else {
-            if(numCPUThreads + numIOThreads != totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) + the count of io threads (%d) does not match",totalThreads,numCPUThreads,numIOThreads));
-            this.numCPUThreads = numCPUThreads;
-            this.numIOThreads = numIOThreads;
-        }
+    public ThreadAllocation(final int numDataThreads,
+                            final int numCPUThreadsPerDataThread,
+                            final int numIOThreads,
+                            final boolean monitorEfficiency) {
+        if ( numDataThreads < 1 ) throw new ReviewedStingException("numDataThreads cannot be less than 1, but saw " + numDataThreads);
+        if ( numCPUThreadsPerDataThread < 1 ) throw new ReviewedStingException("numCPUThreadsPerDataThread cannot be less than 1, but saw " + numCPUThreadsPerDataThread);
+        if ( numIOThreads < 0 ) throw new ReviewedStingException("numIOThreads cannot be less than 0, but saw " + numIOThreads);

+        this.numDataThreads = numDataThreads;
+        this.numCPUThreadsPerDataThread = numCPUThreadsPerDataThread;
+        this.numIOThreads = numIOThreads;
        this.monitorEfficiency = monitorEfficiency;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -44,24 +44,12 @@ import java.util.List;
 import java.util.Map;

 public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,ProviderType extends ShardDataProvider> {
+    /** our log, which we want to capture anything from this class */
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
+
    // Time in milliseconds since we initialized this engine
    private static final int HISTORY_WINDOW_SIZE = 50;

-    private static class ProcessingHistory {
-        double elapsedSeconds;
-        long unitsProcessed;
-        long bpProcessed;
-        GenomeLoc loc;
-
-        public ProcessingHistory(double elapsedSeconds, GenomeLoc loc, long unitsProcessed, long bpProcessed) {
-            this.elapsedSeconds = elapsedSeconds;
-            this.loc = loc;
-            this.unitsProcessed = unitsProcessed;
-            this.bpProcessed = bpProcessed;
-        }
-
-    }
-
    /** lock object to sure updates to history are consistent across threads */
    private static final Object lock = new Object();
    LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@ -70,13 +58,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
    private SimpleTimer timer = null;

    // How long can we go without printing some progress info?
-    private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
-    private int printProgressCheckCounter = 0;
    private long lastProgressPrintTime = -1;                       // When was the last time we printed progress log?
-    private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
-    private long PROGRESS_PRINT_FREQUENCY = 10 * 1000;             // in milliseconds
-    private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
-    private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
+
+    private final static long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
+    private final static double TWO_HOURS_IN_SECONDS                 = 2.0 * 60.0 * 60.0;
+    private final static double TWELVE_HOURS_IN_SECONDS              = 12.0 * 60.0 * 60.0;
+    private long progressPrintFrequency                              = 10 * 1000; // in milliseconds
    private boolean progressMeterInitialized = false;

    // for performance log
@ -85,15 +72,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
    private File performanceLogFile;
    private PrintStream performanceLog = null;
    private long lastPerformanceLogPrintTime = -1;                   // When was the last time we printed to the performance log?
-    private final long PERFORMANCE_LOG_PRINT_FREQUENCY = PROGRESS_PRINT_FREQUENCY;  // in milliseconds
+    private final long PERFORMANCE_LOG_PRINT_FREQUENCY = progressPrintFrequency;  // in milliseconds

    /** Size, in bp, of the area we are processing.  Updated once in the system in initial for performance reasons */
    long targetSize = -1;
    GenomeLocSortedSet targetIntervals = null;

-    /** our log, which we want to capture anything from this class */
-    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
-
    protected GenomeAnalysisEngine engine;

    // ----------------------------------------------------------------------------------------------------
@ -186,15 +170,35 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
        return elapsed > printFreq && elapsed > MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS;
    }

+    /**
+     * Update the cumulative traversal metrics according to the data in this shard
+     *
+     * @param shard a non-null shard
+     */
+    public void updateCumulativeMetrics(final Shard shard) {
+        updateCumulativeMetrics(shard.getReadMetrics());
+    }
+
+    /**
+     * Update the cumulative traversal metrics according to the data in this shard
+     *
+     * @param singleTraverseMetrics read metrics object containing the information about a single shard's worth
+     *                              of data processing
+     */
+    public void updateCumulativeMetrics(final ReadMetrics singleTraverseMetrics) {
+        engine.getCumulativeMetrics().incrementMetrics(singleTraverseMetrics);
+    }
+
    /**
     * Forward request to printProgress
     *
-     * @param shard the given shard currently being processed.
+     * Assumes that one cycle has been completed
+     *
     * @param loc  the location
     */
-    public void printProgress(Shard shard, GenomeLoc loc) {
+    public void printProgress(final GenomeLoc loc) {
        // A bypass is inserted here for unit testing.
-        printProgress(loc,shard.getReadMetrics(),false);
+        printProgress(loc, false);
    }

    /**
@ -202,15 +206,10 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     * every M seconds, for N and M set in global variables.
     *
     * @param loc       Current location, can be null if you are at the end of the traversal
-     * @param metrics   Data processed since the last cumulative
     * @param mustPrint If true, will print out info, regardless of nRecords or time interval
     */
-    private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) {
-        if ( mustPrint || printProgressCheckCounter++ % PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES != 0 )
-            // don't do any work more often than PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES
-            return;
-
-        if(!progressMeterInitialized && mustPrint == false ) {
+    private synchronized void printProgress(final GenomeLoc loc, boolean mustPrint) {
+        if( ! progressMeterInitialized ) {
            logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]");
            logger.info(String.format("%15s processed.%s  runtime per.1M.%s completed total.runtime remaining",
                    "Location", getTraversalType(), getTraversalType()));
@ -218,40 +217,34 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
        }

        final long curTime = timer.currentTime();
-        boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY);
+        boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, progressPrintFrequency);
        boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);

        if ( printProgress || printLog ) {
-            // getting and appending metrics data actually turns out to be quite a heavyweight
-            // operation.  Postpone it until after determining whether to print the log message.
-            ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics() : new ReadMetrics();
-            if(metrics != null)
-                cumulativeMetrics.incrementMetrics(metrics);
-
-            final long nRecords = cumulativeMetrics.getNumIterations();
-
-            ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
+            final ProcessingHistory last = updateHistory(loc, engine.getCumulativeMetrics());

            final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
-            final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
-            final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
-            final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
+            final AutoFormattingTime bpRate = new AutoFormattingTime(last.secondsPerMillionBP());
+            final AutoFormattingTime unitRate = new AutoFormattingTime(last.secondsPerMillionElements());
+            final double fractionGenomeTargetCompleted = last.calculateFractionGenomeTargetCompleted(targetSize);
            final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
            final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
+            final long nRecords = engine.getCumulativeMetrics().getNumIterations();

            if ( printProgress ) {
                lastProgressPrintTime = curTime;

                // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
                if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
-                    PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
+                    progressPrintFrequency = 60 * 1000; // in milliseconds
                else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
-                    PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
+                    progressPrintFrequency = 30 * 1000; // in milliseconds
                else
-                    PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
+                    progressPrintFrequency = 10 * 1000; // in milliseconds

-                logger.info(String.format("%15s        %5.2e %s     %s     %4.1f%%      %s  %s",
-                        loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate,
+                final String posName = loc == null ? (mustPrint ? "done" : "unmapped reads") : String.format("%s:%d", loc.getContig(), loc.getStart());
+                logger.info(String.format("%15s        %5.2e %s     %s    %5.1f%%      %s  %s",
+                        posName, nRecords*1.0, elapsed, unitRate,
                        100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));

            }
@ -277,7 +270,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     * @param metrics information about what's been processed already
     * @return
     */
-    private final ProcessingHistory updateHistory(GenomeLoc loc, ReadMetrics metrics) {
+    private ProcessingHistory updateHistory(GenomeLoc loc, ReadMetrics metrics) {
        synchronized (lock) {
            if ( history.size() > HISTORY_WINDOW_SIZE )
                history.pop();
@ -290,26 +283,11 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
        }
    }

-    /** How long in seconds to process 1M traversal units? */
-    private final double secondsPerMillionElements(ProcessingHistory last) {
-        return (last.elapsedSeconds * 1000000.0) / Math.max(last.unitsProcessed, 1);
-    }
-
-    /** How long in seconds to process 1M bp on the genome? */
-    private final double secondsPerMillionBP(ProcessingHistory last) {
-        return (last.elapsedSeconds * 1000000.0) / Math.max(last.bpProcessed, 1);
-    }
-
-    /** What fractoin of the target intervals have we covered? */
-    private final double calculateFractionGenomeTargetCompleted(ProcessingHistory last) {
-        return (1.0*last.bpProcessed) / targetSize;
-    }
-
    /**
     * Called after a traversal to print out information about the traversal process
     */
    public void printOnTraversalDone() {
-        printProgress(null, null, true);
+        printProgress(null, true);

        final double elapsed = timer == null ? 0 : timer.getElapsedTime();

@ -370,7 +348,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     * @return Frequency, in seconds, of performance log writes.
     */
    public long getPerformanceProgressPrintFrequencySeconds() {
-        return PROGRESS_PRINT_FREQUENCY;
+        return progressPrintFrequency;
    }

    /**
@ -378,6 +356,35 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     * @param seconds number of seconds between messages indicating performance frequency.
     */
    public void setPerformanceProgressPrintFrequencySeconds(long seconds) {
-        PROGRESS_PRINT_FREQUENCY = seconds;
+        progressPrintFrequency = seconds;
+    }
+
+    private static class ProcessingHistory {
+        double elapsedSeconds;
+        long unitsProcessed;
+        long bpProcessed;
+        GenomeLoc loc;
+
+        public ProcessingHistory(double elapsedSeconds, GenomeLoc loc, long unitsProcessed, long bpProcessed) {
+            this.elapsedSeconds = elapsedSeconds;
+            this.loc = loc;
+            this.unitsProcessed = unitsProcessed;
+            this.bpProcessed = bpProcessed;
+        }
+
+        /** How long in seconds to process 1M traversal units? */
+        private double secondsPerMillionElements() {
+            return (elapsedSeconds * 1000000.0) / Math.max(unitsProcessed, 1);
+        }
+
+        /** How long in seconds to process 1M bp on the genome? */
+        private double secondsPerMillionBP() {
+            return (elapsedSeconds * 1000000.0) / Math.max(bpProcessed, 1);
+        }
+
+        /** What fractoin of the target intervals have we covered? */
+        private double calculateFractionGenomeTargetCompleted(final long targetSize) {
+            return (1.0*bpProcessed) / targetSize;
+        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@ -104,7 +104,8 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio

                prevLoc = location;

-                printProgress(dataProvider.getShard(), locus.getLocation());
+                updateCumulativeMetrics(dataProvider.getShard());
+                printProgress(locus.getLocation());
            }

            // Take the individual isActive calls and integrate them into contiguous active regions and
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
@ -196,7 +196,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                sum = walker.reduce(x, sum);
            }

-            printProgress(dataProvider.getShard(),site);
+            updateCumulativeMetrics(dataProvider.getShard());
+            printProgress(site);
            done = walker.isDone();
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.traversals;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.WalkerManager;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.providers.*;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
@ -15,28 +13,42 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 /**
 * A simple solution to iterating over all reference positions over a series of genomic locations.
 */
-public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
+public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
    /**
     * our log, which we want to capture anything from this class
     */
    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);

    @Override
-    protected String getTraversalType() {
+    protected final String getTraversalType() {
        return "sites";
    }

+    protected static class TraverseResults<T> {
+        final int numIterations;
+        final T reduceResult;
+
+        public TraverseResults(int numIterations, T reduceResult) {
+            this.numIterations = numIterations;
+            this.reduceResult = reduceResult;
+        }
+    }
+
+    protected abstract TraverseResults<T> traverse( final LocusWalker<M,T> walker,
+                                                    final LocusView locusView,
+                                                    final LocusReferenceView referenceView,
+                                                    final ReferenceOrderedView referenceOrderedDataView,
+                                                    final T sum);
+
    @Override
    public T traverse( LocusWalker<M,T> walker,
                       LocusShardDataProvider dataProvider,
                       T sum) {
-        logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
+        logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider));

-        LocusView locusView = getLocusView( walker, dataProvider );
-        boolean done = false;
+        final LocusView locusView = getLocusView( walker, dataProvider );

        if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
-
            //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
            ReferenceOrderedView referenceOrderedDataView = null;
            if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
@ -44,43 +56,24 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
            else
                referenceOrderedDataView = (RodLocusView)locusView;

-            LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
+            final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );

-            // We keep processing while the next reference location is within the interval
-            while( locusView.hasNext() && ! done ) {
-                AlignmentContext locus = locusView.next();
-                GenomeLoc location = locus.getLocation();
-
-                dataProvider.getShard().getReadMetrics().incrementNumIterations();
-
-                // create reference context. Note that if we have a pileup of "extended events", the context will
-                // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
-                ReferenceContext refContext = referenceView.getReferenceContext(location);
-
-                // Iterate forward to get all reference ordered data covering this location
-                final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
-
-                final boolean keepMeP = walker.filter(tracker, refContext, locus);
-                if (keepMeP) {
-                    M x = walker.map(tracker, refContext, locus);
-                    sum = walker.reduce(x, sum);
-                    done = walker.isDone();
-                }
-
-                printProgress(dataProvider.getShard(),locus.getLocation());
-            }
+            final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
+            sum = result.reduceResult;
+            dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
+            updateCumulativeMetrics(dataProvider.getShard());
        }

        // We have a final map call to execute here to clean up the skipped based from the
        // last position in the ROD to that in the interval
        if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
            // only do this if the walker isn't done!
-            RodLocusView rodLocusView = (RodLocusView)locusView;
-            long nSkipped = rodLocusView.getLastSkippedBases();
+            final RodLocusView rodLocusView = (RodLocusView)locusView;
+            final long nSkipped = rodLocusView.getLastSkippedBases();
            if ( nSkipped > 0 ) {
-                GenomeLoc site = rodLocusView.getLocOneBeyondShard();
-                AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
-                M x = walker.map(null, null, ac);
+                final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
+                final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
+                final M x = walker.map(null, null, ac);
                sum = walker.reduce(x, sum);
            }
        }
@ -90,14 +83,14 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu

    /**
     * Gets the best view of loci for this walker given the available data.  The view will function as a 'trigger track'
-     * of sorts, providing a consistent interface so that TraverseLoci doesn't need to be reimplemented for any new datatype
+     * of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype
     * that comes along.
     * @param walker walker to interrogate.
     * @param dataProvider Data which which to drive the locus view.
     * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
     */
    private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
-        DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
+        final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
        if( dataSource == DataSource.READS )
            return new CoveredLocusView(dataProvider);
        else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
@ -0,0 +1,47 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+/**
+ * A simple solution to iterating over all reference positions over a series of genomic locations.
+ */
+public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
+
+    @Override
+    protected TraverseResults<T> traverse(LocusWalker<M, T> walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) {
+        // We keep processing while the next reference location is within the interval
+        boolean done = false;
+        int numIterations = 0;
+
+        while( locusView.hasNext() && ! done ) {
+            numIterations++;
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();
+
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+            final boolean keepMeP = walker.filter(tracker, refContext, locus);
+            if (keepMeP) {
+                final M x = walker.map(tracker, refContext, locus);
+                sum = walker.reduce(x, sum);
+                done = walker.isDone();
+            }
+
+            printProgress(locus.getLocation());
+        }
+
+        return new TraverseResults<T>(numIterations, sum);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
@ -0,0 +1,205 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NSProgressFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
+
+import java.util.Iterator;
+
+/**
+ * A simple solution to iterating over all reference positions over a series of genomic locations.
+ */
+public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
+    /** our log, which we want to capture anything from this class */
+    private static final boolean DEBUG = false;
+    private static final int BUFFER_SIZE = 1000;
+
+    final NanoScheduler<MapData, MapResult, T> nanoScheduler;
+
+    public TraverseLociNano(int nThreads) {
+        nanoScheduler = new NanoScheduler<MapData, MapResult, T>(BUFFER_SIZE, nThreads);
+        nanoScheduler.setProgressFunction(new TraverseLociProgress());
+    }
+
+    @Override
+    protected TraverseResults<T> traverse(final LocusWalker<M, T> walker,
+                                          final LocusView locusView,
+                                          final LocusReferenceView referenceView,
+                                          final ReferenceOrderedView referenceOrderedDataView,
+                                          final T sum) {
+        nanoScheduler.setDebug(DEBUG);
+        final TraverseLociMap myMap = new TraverseLociMap(walker);
+        final TraverseLociReduce myReduce = new TraverseLociReduce(walker);
+
+        final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView);
+        final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce);
+
+        return new TraverseResults<T>(inputIterator.numIterations, result);
+    }
+
+    /**
+     * Create iterator that provides inputs for all map calls into MapData, to be provided
+     * to NanoScheduler for Map/Reduce
+     */
+    private class MapDataIterator implements Iterator<MapData> {
+        final LocusView locusView;
+        final LocusReferenceView referenceView;
+        final ReferenceOrderedView referenceOrderedDataView;
+        int numIterations = 0;
+
+        private MapDataIterator(LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView) {
+            this.locusView = locusView;
+            this.referenceView = referenceView;
+            this.referenceOrderedDataView = referenceOrderedDataView;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return locusView.hasNext();
+        }
+
+        @Override
+        public MapData next() {
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();
+
+            //logger.info("Pulling data from MapDataIterator at " + location);
+
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
+
+            numIterations++;
+            return new MapData(locus, refContext,  tracker);
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("Cannot remove elements from MapDataIterator");
+        }
+    }
+
+    @Override
+    public void printOnTraversalDone() {
+        nanoScheduler.shutdown();
+        super.printOnTraversalDone();
+    }
+
+    /**
+     * The input data needed for each map call.  The read, the reference, and the RODs
+     */
+    private class MapData {
+        final AlignmentContext alignmentContext;
+        final ReferenceContext refContext;
+        final RefMetaDataTracker tracker;
+
+        private MapData(final AlignmentContext alignmentContext, ReferenceContext refContext, RefMetaDataTracker tracker) {
+            this.alignmentContext = alignmentContext;
+            this.refContext = refContext;
+            this.tracker = tracker;
+        }
+
+        @Override
+        public String toString() {
+            return "MapData " + alignmentContext.getLocation();
+        }
+    }
+
+    /**
+     * Contains the results of a map call, indicating whether the call was good, filtered, or done
+     */
+    private class MapResult {
+        final M value;
+        final boolean reduceMe;
+
+        /**
+         * Create a MapResult with value that should be reduced
+         *
+         * @param value the value to reduce
+         */
+        private MapResult(final M value) {
+            this.value = value;
+            this.reduceMe = true;
+        }
+
+        /**
+         * Create a MapResult that shouldn't be reduced
+         */
+        private MapResult() {
+            this.value = null;
+            this.reduceMe = false;
+        }
+    }
+
+    /**
+     * A static object that tells reduce that the result of map should be skipped (filtered or done)
+     */
+    private final MapResult SKIP_REDUCE = new MapResult();
+
+    /**
+     * MapFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Applies walker.map to MapData, returning a MapResult object containing the result
+     */
+    private class TraverseLociMap implements NSMapFunction<MapData, MapResult> {
+        final LocusWalker<M,T> walker;
+
+        private TraverseLociMap(LocusWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public MapResult apply(final MapData data) {
+            if ( ! walker.isDone() ) {
+                final boolean keepMeP = walker.filter(data.tracker, data.refContext, data.alignmentContext);
+                if (keepMeP) {
+                    final M x = walker.map(data.tracker, data.refContext, data.alignmentContext);
+                    return new MapResult(x);
+                }
+            }
+            return SKIP_REDUCE;
+        }
+    }
+
+    /**
+     * NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
+     */
+    private class TraverseLociReduce implements NSReduceFunction<MapResult, T> {
+        final LocusWalker<M,T> walker;
+
+        private TraverseLociReduce(LocusWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public T apply(MapResult one, T sum) {
+            if ( one.reduceMe )
+                // only run reduce on values that aren't DONE or FAILED
+                return walker.reduce(one.value, sum);
+            else
+                return sum;
+        }
+    }
+
+    private class TraverseLociProgress implements NSProgressFunction<MapData> {
+        @Override
+        public void progress(MapData lastProcessedMap) {
+            if (lastProcessedMap.alignmentContext != null)
+                printProgress(lastProcessedMap.alignmentContext.getLocation());
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
@ -65,7 +65,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
                pairs.clear();
                pairs.add(read);

-                printProgress(dataProvider.getShard(),null);
+                updateCumulativeMetrics(dataProvider.getShard());
+                printProgress(null);
            }

            done = walker.isDone();
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@ -99,8 +99,11 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
                sum = walker.reduce(x, sum);
            }

-            GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
-            printProgress(dataProvider.getShard(),locus);
+            final GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
+
+            updateCumulativeMetrics(dataProvider.getShard());
+            printProgress(locus);
+
            done = walker.isDone();
        }
        return sum;
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@ -34,34 +34,34 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
 import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
-import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.nanoScheduler.NSMapFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NSReduceFunction;
 import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
-import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

-import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;

 /**
- * @author aaron
+ * A nano-scheduling version of TraverseReads.
+ *
+ * Implements the traversal of a walker that accepts individual reads, the reference, and
+ * RODs per map call.  Directly supports shared memory parallelism via NanoScheduler
+ *
+ * @author depristo
 * @version 1.0
- * @date Apr 24, 2009
- * <p/>
- * Class TraverseReads
- * <p/>
- * This class handles traversing by reads in the new shardable style
+ * @date 9/2/2012
 */
 public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
    /** our log, which we want to capture anything from this class */
    protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
    private static final boolean DEBUG = false;
-    private static final int MIN_GROUP_SIZE = 100;
-    final NanoScheduler<MapData, M, T> nanoScheduler;
+    final NanoScheduler<MapData, MapResult, T> nanoScheduler;

    public TraverseReadsNano(int nThreads) {
        final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
-        final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
-        nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
+        nanoScheduler = new NanoScheduler<MapData, MapResult, T>(bufferSize, nThreads);
    }

    @Override
@ -89,19 +89,32 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        final TraverseReadsMap myMap = new TraverseReadsMap(walker);
        final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);

-        T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
-        // TODO -- how do we print progress?
-        //printProgress(dataProvider.getShard(), ???);
+        final List<MapData> aggregatedInputs = aggregateMapData(dataProvider);
+        final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce);
+
+        final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
+        final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
+
+        updateCumulativeMetrics(dataProvider.getShard());
+        printProgress(locus);

        return result;
    }

+    /**
+     * Aggregate all of the inputs for all map calls into MapData, to be provided
+     * to NanoScheduler for Map/Reduce
+     *
+     * @param dataProvider the source of our data
+     * @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
+     *          should execute
+     */
    private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
        final ReadView reads = new ReadView(dataProvider);
        final ReadReferenceView reference = new ReadReferenceView(dataProvider);
        final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);

-        final List<MapData> mapData = new ArrayList<MapData>();  // TODO -- need size of reads
+        final List<MapData> mapData = new LinkedList<MapData>();
        for ( final SAMRecord read : reads ) {
            final ReferenceContext refContext = ! read.getReadUnmappedFlag()
                    ? reference.getReferenceContext(read)
@ -127,19 +140,9 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        super.printOnTraversalDone();
    }

-    private class TraverseReadsReduce implements ReduceFunction<M, T> {
-        final ReadWalker<M,T> walker;
-
-        private TraverseReadsReduce(ReadWalker<M, T> walker) {
-            this.walker = walker;
-        }
-
-        @Override
-        public T apply(M one, T sum) {
-            return walker.reduce(one, sum);
-        }
-    }
-
+    /**
+     * The input data needed for each map call.  The read, the reference, and the RODs
+     */
    private class MapData {
        final GATKSAMRecord read;
        final ReferenceContext refContext;
@ -152,7 +155,43 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        }
    }

-    private class TraverseReadsMap implements MapFunction<MapData, M> {
+    /**
+     * Contains the results of a map call, indicating whether the call was good, filtered, or done
+     */
+    private class MapResult {
+        final M value;
+        final boolean reduceMe;
+
+        /**
+         * Create a MapResult with value that should be reduced
+         *
+         * @param value the value to reduce
+         */
+        private MapResult(final M value) {
+            this.value = value;
+            this.reduceMe = true;
+        }
+
+        /**
+         * Create a MapResult that shouldn't be reduced
+         */
+        private MapResult() {
+            this.value = null;
+            this.reduceMe = false;
+        }
+    }
+
+    /**
+     * A static object that tells reduce that the result of map should be skipped (filtered or done)
+     */
+    private final MapResult SKIP_REDUCE = new MapResult();
+
+    /**
+     * MapFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Applies walker.map to MapData, returning a MapResult object containing the result
+     */
+    private class TraverseReadsMap implements NSMapFunction<MapData, MapResult> {
        final ReadWalker<M,T> walker;

        private TraverseReadsMap(ReadWalker<M, T> walker) {
@ -160,15 +199,36 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
        }

        @Override
-        public M apply(final MapData data) {
+        public MapResult apply(final MapData data) {
            if ( ! walker.isDone() ) {
                final boolean keepMeP = walker.filter(data.refContext, data.read);
-                if (keepMeP) {
-                    return walker.map(data.refContext, data.read, data.tracker);
-                }
+                if (keepMeP)
+                    return new MapResult(walker.map(data.refContext, data.read, data.tracker));
            }

-            return null; // TODO -- what should we return in the case where the walker is done or the read is filtered?
+            return SKIP_REDUCE;
+        }
+    }
+
+    /**
+     * NSReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
+     */
+    private class TraverseReadsReduce implements NSReduceFunction<MapResult, T> {
+        final ReadWalker<M,T> walker;
+
+        private TraverseReadsReduce(ReadWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public T apply(MapResult one, T sum) {
+            if ( one.reduceMe )
+                // only run reduce on values that aren't DONE or FAILED
+                return walker.reduce(one.value, sum);
+            else
+                return sum;
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Downsample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Downsample.java
@ -1,6 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers;

-import org.broadinstitute.sting.gatk.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;

 import java.lang.annotation.*;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
@ -45,7 +45,7 @@ import java.text.NumberFormat;
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS})
-public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
+public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements NanoSchedulable {
    @Output
    PrintStream out;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
@ -27,5 +27,5 @@ package org.broadinstitute.sting.gatk.walkers;
 * declare that their map function is thread-safe and so multiple
 * map calls can be run in parallel in the same JVM instance.
 */
-public interface ThreadSafeMapReduce {
+public interface NanoSchedulable {
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
@ -45,25 +45,14 @@ import java.util.Collections;
 import java.util.List;

 /**
- * Prints the alignment in the pileup format. In the pileup format, each line represents a genomic position,
- * consisting of chromosome name, coordinate, reference base, read bases, read qualities and alignment mapping
- * qualities. Information on match, mismatch, indel, strand, mapping quality and start and end of a read are all
- * encoded at the read base column. At this column, a dot stands for a match to the reference base on the forward strand,
- * a comma for a match on the reverse strand, 'ACGTN' for a mismatch on the forward strand and 'acgtn' for a mismatch on the
- * reverse strand.
- *
- * A pattern '\+[0-9]+[ACGTNacgtn]+' indicates there is an insertion between this reference position and the next
- * reference position. The length of the insertion is given by the integer in the pattern, followed by the inserted sequence.
- * Similarly, a pattern '-[0-9]+[ACGTNacgtn]+' represents a deletion from the reference.
- * Also at the read base column, a symbol '^' marks the start of a read segment which is a contiguous subsequence on the read
- * separated by 'N/S/H' CIGAR operations. The ASCII of the character following '^' minus 33 gives the mapping quality.
- * A symbol '$' marks the end of a read segment.
+ * Prints the alignment in something similar to the samtools pileup format.  Each line represents a genomic position,
+ * consisting of chromosome name, coordinate, reference base, read bases, and read qualities.
 *
 * Associated command:
 * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
+public class Pileup extends LocusWalker<String, Integer> implements TreeReducible<Integer>, NanoSchedulable {

    private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names

@ -81,27 +70,32 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
    @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
    public List<RodBinding<Feature>> rods = Collections.emptyList();

-    public void initialize() {
-    }
-
-    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-
-        String rods = getReferenceOrderedData( tracker );
+    @Override
+    public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        final String rods = getReferenceOrderedData( tracker );

        ReadBackedPileup basePileup = context.getBasePileup();
-        out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
-        if ( SHOW_VERBOSE )
-            out.printf(" %s", createVerboseOutput(basePileup));
-        out.println();

-        return 1;
+        final StringBuilder s = new StringBuilder();
+        s.append(String.format("%s %s", basePileup.getPileupString((char)ref.getBase()), rods));
+        if ( SHOW_VERBOSE )
+            s.append(" ").append(createVerboseOutput(basePileup));
+        s.append("\n");
+
+        return s.toString();
    }

    // Given result of map function
+    @Override
    public Integer reduceInit() { return 0; }
-    public Integer reduce(Integer value, Integer sum) {
-        return treeReduce(sum,value);
+
+    @Override
+    public Integer reduce(String value, Integer sum) {
+        out.print(value);
+        return sum + 1;
    }
+
+    @Override
    public Integer treeReduce(Integer lhs, Integer rhs) {
        return lhs + rhs;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
@ -93,7 +93,7 @@ import java.util.*;
@ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
@BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
@Requires({DataSource.READS, DataSource.REFERENCE})
-public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
+public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements NanoSchedulable {

    @Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
    SAMFileWriter out;
@ -228,7 +228,6 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
        GATKSAMRecord workingRead = readIn;

        for ( final ReadTransformer transformer : readTransformers ) {
-            if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName());
            workingRead = transformer.apply(workingRead);
        }

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
 * shards of the data can reduce with each other, and the composite result
 * can be reduced with other composite results.
 */
-public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
+public interface TreeReducible<ReduceType> {
    /**
     * A composite, 'reduce of reduces' function.
     * @param lhs 'left-most' portion of data in the composite reduce.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/AlleleBalanceBySample.java
@ -33,6 +33,9 @@ public class AlleleBalanceBySample extends GenotypeAnnotation implements Experim
                         final Genotype g,
                         final GenotypeBuilder gb,
                         final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
+        if ( stratifiedContext == null )
+            return;
+
        Double ratio = annotateSNP(stratifiedContext, vc, g);
        if (ratio == null)
            return;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthPerAlleleBySample.java
@ -54,7 +54,7 @@ public class DepthPerAlleleBySample extends GenotypeAnnotation implements Standa
                         final Genotype g,
                         final GenotypeBuilder gb,
                         final PerReadAlleleLikelihoodMap alleleLikelihoodMap) {
-        if ( g == null || !g.isCalled() )
+        if ( g == null || !g.isCalled() || ( stratifiedContext == null && alleleLikelihoodMap == null) )
            return;

        if (alleleLikelihoodMap != null && !alleleLikelihoodMap.isEmpty())
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityZeroBySample.java
@ -55,7 +55,7 @@ public class MappingQualityZeroBySample extends GenotypeAnnotation {
                         final Genotype g,
                         final GenotypeBuilder gb,
                         final PerReadAlleleLikelihoodMap alleleLikelihoodMap){
-        if ( g == null || !g.isCalled() )
+        if ( g == null || !g.isCalled() || stratifiedContext == null )
            return;

        int mq0 = 0;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@ -300,16 +300,12 @@ public class VariantAnnotatorEngine {
            if (stratifiedPerReadAlleleLikelihoodMap != null)
                perReadAlleleLikelihoodMap = stratifiedPerReadAlleleLikelihoodMap.get(genotype.getSampleName());

-            if ( context == null && perReadAlleleLikelihoodMap == null) {
-                // no likelihoods nor pileup available: just move on to next sample
-                genotypes.add(genotype);
-            } else {
-                final GenotypeBuilder gb = new GenotypeBuilder(genotype);
-                for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
-                    annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
-                }
-                genotypes.add(gb.make());
+
+            final GenotypeBuilder gb = new GenotypeBuilder(genotype);
+            for ( final GenotypeAnnotation annotation : requestedGenotypeAnnotations ) {
+                annotation.annotate(tracker, walker, ref, context, vc, genotype, gb, perReadAlleleLikelihoodMap);
            }
+            genotypes.add(gb.make());
        }

        return genotypes;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@ -109,7 +109,7 @@ import java.util.ArrayList;
@ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
@Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
@PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
-public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
+public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
    @ArgumentCollection
    private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
@ -30,7 +30,7 @@ import org.broadinstitute.sting.commandline.Advanced;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Output;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
-import org.broadinstitute.sting.gatk.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -27,7 +27,7 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
-import org.broadinstitute.sting.gatk.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
 import org.broadinstitute.sting.gatk.arguments.DbsnpArgumentCollection;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -125,7 +125,7 @@ import java.util.*;
 // TODO -- When LocusIteratorByState gets cleaned up, we should enable multiple @By sources:
 // TODO -- @By( {DataSource.READS, DataSource.REFERENCE_ORDERED_DATA} )
@Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
-public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatible {
+public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatible, NanoSchedulable {

    @ArgumentCollection
    private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreator.java
@ -57,7 +57,7 @@ import java.util.TreeSet;
 * is minimized across all the reads. In general, a large percent of regions requiring local realignment are due to the presence of an insertion
 * or deletion (indels) in the individual's genome with respect to the reference genome.  Such alignment artifacts result in many bases mismatching
 * the reference near the misalignment, which are easily mistaken as SNPs.  Moreover, since read mapping algorithms operate on each read independently,
- * it is impossible to place reads on the reference genome such at mismatches are minimized across all reads.  Consequently, even when some reads are
+ * it is impossible to place reads on the reference genome such that mismatches are minimized across all reads.  Consequently, even when some reads are
 * correctly mapped with indels, reads covering the indel near just the start or end of the read are often incorrectly mapped with respect the true indel,
 * also requiring realignment.  Local realignment serves to transform regions with misalignments due to indels into clean reads containing a consensus
 * indel suitable for standard variant discovery approaches.  Unlike most mappers, this walker uses the full alignment context to determine whether an
@ -69,7 +69,7 @@ import java.util.TreeSet;
 *     <li>Running the realigner over those intervals (see the IndelRealigner tool)</li>
 *     </ol>
 *     <p>
- * An important note: the input bam(s), reference, and known indel file(s) should be the same ones to be used for the IndelRealigner step.
+ * An important note: the input BAM(s), reference, and known indel file(s) should be the same ones to be used for the IndelRealigner step.
 * <p>
 * Another important note: because reads produced from the 454 technology inherently contain false indels, the realigner will not currently work with them
 * (or with reads from similar technologies).   This tool also ignores MQ0 reads and reads with consecutive indel operators in the CIGAR string.
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@ -541,7 +541,7 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
    //Get a Map of genotype likelihoods.
    //In case of null, unavailable or no call, all likelihoods are 1/3.
    private EnumMap<GenotypeType,Double> getLikelihoodsAsMapSafeNull(Genotype genotype){
-        if(genotype == null || !genotype.isCalled()){
+        if(genotype == null || !genotype.isCalled() || genotype.getLikelihoods() == null){
            EnumMap<GenotypeType,Double> likelihoods = new EnumMap<GenotypeType, Double>(GenotypeType.class);
            likelihoods.put(GenotypeType.HOM_REF,1.0/3.0);
            likelihoods.put(GenotypeType.HET,1.0/3.0);
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java
@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;

@ -40,7 +41,7 @@ import java.io.PrintStream;
 *
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class CountLoci extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
+public class CountLoci extends LocusWalker<Integer, Long> implements TreeReducible<Long>, NanoSchedulable {
    @Output(doc="Write count to this file instead of STDOUT")
    PrintStream out;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.GenomeLoc;
@ -73,7 +74,7 @@ import java.util.*;
 *
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
+public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>>, NanoSchedulable {
    @Output
    public PrintStream out;

--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
@ -4,9 +4,9 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.Requires;
-import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;

@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 */
@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
@Requires({DataSource.READS, DataSource.REFERENCE})
-public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
+public class CountReads extends ReadWalker<Integer, Integer> implements NanoSchedulable {
    public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
        return 1;
    }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/ErrorThrowing.java
@ -24,6 +24,7 @@

 package org.broadinstitute.sting.gatk.walkers.qc;

+import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
@ -45,20 +46,23 @@ public class ErrorThrowing extends RodWalker<Integer,Integer> implements TreeRed
    @Input(fullName="exception", shortName = "E", doc="Java class of exception to throw", required=true)
    public String exceptionToThrow;

+    @Argument(fullName = "failMethod", shortName = "fail", doc = "Determines which method to fail in", required = false)
+    public FailMethod failMethod = FailMethod.MAP;
+
+    public enum FailMethod {
+          MAP,
+          REDUCE,
+          TREE_REDUCE
+    }
+
    //
    // Template code to allow us to build the walker, doesn't actually do anything
    //
    @Override
    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if ( exceptionToThrow.equals("UserException") ) {
-            throw new UserException("UserException");
-        } else if ( exceptionToThrow.equals("NullPointerException") ) {
-            throw new NullPointerException();
-        } else if ( exceptionToThrow.equals("ReviewedStingException") ) {
-            throw new ReviewedStingException("ReviewedStingException");
-        } else {
-            throw new UserException.BadArgumentValue("exception", "exception isn't a recognized value " + exceptionToThrow);
-        }
+        if ( failMethod == FailMethod.MAP )
+            fail();
+        return 0;
    }

    @Override
@ -68,10 +72,32 @@ public class ErrorThrowing extends RodWalker<Integer,Integer> implements TreeRed

    @Override
    public Integer reduce(Integer value, Integer sum) {
+        if ( failMethod == FailMethod.REDUCE )
+            fail();
        return value + sum;
    }

    public Integer treeReduce(final Integer lhs, final Integer rhs) {
+        if ( failMethod == FailMethod.TREE_REDUCE )
+            fail();
        return lhs + rhs;
    }
+
+    private void fail() {
+        if ( exceptionToThrow.equals("UserException") ) {
+            throw new UserException("UserException");
+        } else if ( exceptionToThrow.equals("NullPointerException") ) {
+            throw new NullPointerException();
+        } else if ( exceptionToThrow.equals("ReviewedStingException") ) {
+            throw new ReviewedStingException("ReviewedStingException");
+        } else if ( exceptionToThrow.equals("SamError1") ) {
+            throw new RuntimeException(CommandLineGATK.PICARD_TEXT_SAM_FILE_ERROR_1);
+        } else if ( exceptionToThrow.equals("SamError2") ) {
+            throw new RuntimeException(CommandLineGATK.PICARD_TEXT_SAM_FILE_ERROR_2);
+        } else if ( exceptionToThrow.equals("NoSpace") ) {
+            throw new net.sf.samtools.util.RuntimeIOException(new java.io.IOException("No space left on device java.io.FileOutputStream.writeBytes(Native Method)"));
+        } else {
+            throw new UserException.BadArgumentValue("exception", "exception isn't a recognized value " + exceptionToThrow);
+        }
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/MathUtils.java
@ -75,6 +75,17 @@ public class MathUtils {
        }
    }

+    /**
+     * Get a random int between min and max (inclusive) using the global GATK random number generator
+     *
+     * @param min lower bound of the range
+     * @param max upper bound of the range
+     * @return a random int >= min and <= max
+     */
+    public static int randomIntegerInRange( int min, int max ) {
+        return GenomeAnalysisEngine.getRandomGenerator().nextInt(max - min + 1) + min;
+    }
+
    // A fast implementation of the Math.round() method.  This method does not perform
    // under/overflow checking, so this shouldn't be used in the general case (but is fine
    // if one is already make those checks before calling in to the rounding).
@ -1655,5 +1666,4 @@ public class MathUtils {
        return result;

    }
-
 }
--- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java
@ -1,18 +1,42 @@
 package org.broadinstitute.sting.utils;


+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.util.concurrent.TimeUnit;
+
 /**
- * A useful simple system for timing code.  This code is not thread safe!
+ * A useful simple system for timing code with nano second resolution
+ *
+ * Note that this code is not thread-safe.  If you have a single timer
+ * being started and stopped by multiple threads you will need to protect the
+ * calls to avoid meaningless results of having multiple starts and stops
+ * called sequentially.
 *
 * User: depristo
 * Date: Dec 10, 2010
 * Time: 9:07:44 AM
 */
 public class SimpleTimer {
-    final private String name;
-    private long elapsed = 0l;
-    private long startTime = 0l;
-    boolean running = false;
+    protected static final double NANO_TO_SECOND_DOUBLE = 1.0 / TimeUnit.SECONDS.toNanos(1);
+    private final String name;
+
+    /**
+     * The elapsedTimeNano time in nanoSeconds of this timer.  The elapsedTimeNano time is the
+     * sum of times between starts/restrats and stops.
+     */
+    private long elapsedTimeNano = 0l;
+
+    /**
+     * The start time of the last start/restart in nanoSeconds
+     */
+    private long startTimeNano = 0l;
+
+    /**
+     * Is this timer currently running (i.e., the last call was start/restart)
+     */
+    private boolean running = false;

    /**
     * Creates an anonymous simple timer
@ -25,7 +49,8 @@ public class SimpleTimer {
     * Creates a simple timer named name
     * @param name of the timer, must not be null
     */
-    public SimpleTimer(String name) {
+    public SimpleTimer(final String name) {
+        if ( name == null ) throw new IllegalArgumentException("SimpleTimer name cannot be null");
        this.name = name;
    }

@ -37,27 +62,27 @@ public class SimpleTimer {
    }

    /**
-     * Starts the timer running, and sets the elapsed time to 0.  This is equivalent to
+     * Starts the timer running, and sets the elapsedTimeNano time to 0.  This is equivalent to
     * resetting the time to have no history at all.
     *
     * @return this object, for programming convenience
     */
+    @Ensures("elapsedTimeNano == 0l")
    public synchronized SimpleTimer start() {
-        elapsed = 0l;
-        restart();
-        return this;
+        elapsedTimeNano = 0l;
+        return restart();
    }

    /**
-     * Starts the timer running, without reseting the elapsed time.  This function may be
+     * Starts the timer running, without resetting the elapsedTimeNano time.  This function may be
     * called without first calling start().  The only difference between start and restart
-     * is that start resets the elapsed time, while restart does not.
+     * is that start resets the elapsedTimeNano time, while restart does not.
     *
     * @return this object, for programming convenience
     */
    public synchronized SimpleTimer restart() {
        running = true;
-        startTime = currentTime();
+        startTimeNano = currentTimeNano();
        return this;
    }

@ -71,29 +96,53 @@ public class SimpleTimer {
    /**
     * @return A convenience function to obtain the current time in milliseconds from this timer
     */
-    public synchronized long currentTime() {
+    public long currentTime() {
        return System.currentTimeMillis();
    }

    /**
-     * Stops the timer.  Increases the elapsed time by difference between start and now.  The
-     * timer must be running in order to call stop
+     * @return A convenience function to obtain the current time in nanoSeconds from this timer
+     */
+    public long currentTimeNano() {
+        return System.nanoTime();
+    }
+
+    /**
+     * Stops the timer.  Increases the elapsedTimeNano time by difference between start and now.
+     *
+     * It's ok to call stop on a timer that's not running.  It has no effect on the timer.
     *
     * @return this object, for programming convenience
     */
+    @Requires("startTimeNano != 0l")
    public synchronized SimpleTimer stop() {
-        running = false;
-        elapsed += currentTime() - startTime;
+        if ( running ) {
+            running = false;
+            elapsedTimeNano += currentTimeNano() - startTimeNano;
+        }
        return this;
    }

    /**
-     * Returns the total elapsed time of all start/stops of this timer.  If the timer is currently
+     * Returns the total elapsedTimeNano time of all start/stops of this timer.  If the timer is currently
     * running, includes the difference from currentTime() and the start as well
     *
     * @return this time, in seconds
     */
    public synchronized double getElapsedTime() {
-        return (running ? (currentTime() - startTime + elapsed) : elapsed) / 1000.0;
+        return nanoToSecondsAsDouble(getElapsedTimeNano());
+    }
+
+    protected static double nanoToSecondsAsDouble(final long nano) {
+        return nano * NANO_TO_SECOND_DOUBLE;
+    }
+
+    /**
+     * @see #getElapsedTime() but returns the result in nanoseconds
+     *
+     * @return the elapsed time in nanoseconds
+     */
+    public synchronized long getElapsedTimeNano() {
+        return running ? (currentTimeNano() - startTimeNano + elapsedTimeNano) : elapsedTimeNano;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@ -27,6 +27,8 @@ package org.broadinstitute.sting.utils.classloader;

 import ch.qos.logback.classic.Level;
 import ch.qos.logback.classic.Logger;
+import org.broadinstitute.sting.gatk.WalkerManager;
+import org.broadinstitute.sting.gatk.filters.FilterManager;
 import org.broadinstitute.sting.utils.exceptions.DynamicClassResolutionException;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -276,8 +278,16 @@ public class PluginManager<PluginType> {
     */
    public PluginType createByName(String pluginName) {
        Class<? extends PluginType> plugin = pluginsByName.get(pluginName);
-        if( plugin == null )
-            throw new UserException(formatErrorMessage(pluginCategory,pluginName));
+        if( plugin == null ) {
+            String errorMessage = formatErrorMessage(pluginCategory,pluginName);
+            if ( this.getClass().isAssignableFrom(FilterManager.class) ) {
+                throw new UserException.MalformedReadFilterException(errorMessage);
+            } else if ( this.getClass().isAssignableFrom(WalkerManager.class) ) {
+                throw new UserException.MalformedWalkerArgumentsException(errorMessage);
+            } else {
+                throw new UserException.CommandLineException(errorMessage);
+            }
+        }
        try {
            return plugin.newInstance();
        } catch (Exception e) {
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@ -63,6 +63,18 @@ public class UserException extends ReviewedStingException {
        }
    }

+    public static class MalformedReadFilterException extends CommandLineException {
+        public MalformedReadFilterException(String message) {
+            super(String.format("Malformed read filter: %s",message));
+        }
+    }
+
+    public static class MalformedWalkerArgumentsException extends CommandLineException {
+        public MalformedWalkerArgumentsException(String message) {
+            super(String.format("Malformed walker argument: %s",message));
+        }
+    }
+
    public static class MalformedGenomeLoc extends UserException {
        public MalformedGenomeLoc(String message, GenomeLoc loc) {
            super(String.format("Badly formed genome loc: %s: %s", message, loc));
@ -129,6 +141,12 @@ public class UserException extends ReviewedStingException {
        }
    }

+    public static class NoSpaceOnDevice extends UserException {
+        public NoSpaceOnDevice() {
+            super("There is no space left on the device, so writing failed");
+        }
+    }
+
    public static class CouldNotReadInputFile extends UserException {
        public CouldNotReadInputFile(String message, Exception e) {
            super(String.format("Couldn't read file because %s caused by %s", message, getMessage(e)));
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/BlockingQueueValue.java
@ -0,0 +1,82 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+import com.google.java.contract.Invariant;
+
+/**
+ * Wrapper to hold data for a blocking queue, distinguishing an EOF marker from a real object
+ *
+ * The only way to tell in a consumer thread that a blocking queue has no more data ever
+ * coming down the pipe is to pass in a "poison" or EOF object.  This class provides
+ * a generic capacity for that...
+ *
+ * The use case looks like this:
+ *
+ * BlockingQueue q
+ * producer:
+ *   while ( x has items )
+ *      q.put(new BlockingQueueValue(x))
+ *   q.put(new BlockingQueueValue())
+ *
+ * Consumer:
+ *   while ( true )
+ *       value = q.take()
+ *       if ( value.isLast() )
+ *          break
+ *       else
+ *          do something useful with value
+ *
+ *
+ * User: depristo
+ * Date: 9/6/12
+ * Time: 3:08 PM
+ */
+@Invariant("! isLast || value == null")
+class BlockingQueueValue<T> {
+    /**
+     * True if this is the EOF marker object
+     */
+    final private boolean isLast;
+
+    /**
+     * Our value, if we aren't the EOF marker
+     */
+    final private T value;
+
+    /**
+     * Create a new BlockingQueueValue containing a real value, where last is false
+     * @param value
+     */
+    BlockingQueueValue(final T value) {
+        isLast = false;
+        this.value = value;
+    }
+
+    /**
+     * Create a new BlockingQueueValue that is the last item
+     */
+    BlockingQueueValue() {
+        isLast = true;
+        this.value = null;
+    }
+
+    /**
+     * Is this the EOF marker?
+     *
+     * @return true if so, else false
+     */
+    public boolean isLast() {
+        return isLast;
+    }
+
+    /**
+     * Get the value held by this BlockingQueueValue
+     *
+     * @return the value
+     * @throws IllegalStateException if this is the last item
+     */
+    public T getValue() {
+        if ( isLast() )
+            throw new IllegalStateException("Cannot get value for last object");
+        return value;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/FutureValue.java
@ -0,0 +1,45 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.TimeoutException;
+
+/**
+ * Create a future that simply returns a given value
+ *
+ * The only standard way to create a future in java is via the ExecutorService interface.
+ * If you have a data structure holding futures of value T, and you want to add a
+ * value to it for some reason (to add a EOF marker, for instance) you can use this
+ * class to create a dummy Future<T> that simply returns a value.
+ *
+ * @author depristo
+ * @since 09/12
+ */
+class FutureValue<V> implements Future<V> {
+    final V value;
+
+    FutureValue(final V value) {
+        this.value = value;
+    }
+
+    @Override public boolean cancel(boolean mayInterruptIfRunning) {
+        return true;
+    }
+
+    @Override public boolean isCancelled() {
+        return false;
+    }
+
+    @Override public boolean isDone() {
+        return true;
+    }
+
+    @Override public V get() throws InterruptedException, ExecutionException {
+        return value;
+    }
+
+    @Override public V get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException {
+        return get();
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
@ -0,0 +1,62 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+import org.broadinstitute.sting.utils.SimpleTimer;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.Iterator;
+import java.util.concurrent.BlockingQueue;
+
+/**
+ * Producer Thread that reads input values from an inputReads and puts them into a BlockingQueue
+ */
+class InputProducer<InputType> implements Runnable {
+    /**
+     * The iterator we are using to get data from
+     */
+    final Iterator<InputType> inputReader;
+
+    /**
+     * Our timer (may be null) that we use to track our input costs
+     */
+    final SimpleTimer inputTimer;
+
+    /**
+     * Where we put our input values for consumption
+     */
+    final BlockingQueue<InputValue> outputQueue;
+
+    public InputProducer(final Iterator<InputType> inputReader,
+                         final SimpleTimer inputTimer,
+                         final BlockingQueue<InputValue> outputQueue) {
+        if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
+        if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
+
+        this.inputReader = inputReader;
+        this.inputTimer = inputTimer;
+        this.outputQueue = outputQueue;
+    }
+
+    public void run() {
+        try {
+            while ( inputReader.hasNext() ) {
+                if ( inputTimer != null ) inputTimer.restart();
+                final InputType input = inputReader.next();
+                if ( inputTimer != null ) inputTimer.stop();
+                outputQueue.put(new InputValue(input));
+            }
+
+            // add the EOF object so our consumer knows we are done in all inputs
+            outputQueue.put(new InputValue());
+        } catch (InterruptedException ex) {
+            throw new ReviewedStingException("got execution exception", ex);
+        }
+    }
+
+    /**
+     * Helper class that contains a read value suitable for EOF marking in a BlockingQueue
+     */
+    class InputValue extends BlockingQueueValue<InputType> {
+        private InputValue(InputType datum) { super(datum); }
+        private InputValue() { }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapResult.java
@ -0,0 +1,36 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+/**
+ * Holds the results of a map job suitable for producer/consumer threading
+ * via a BlockingQueue
+ */
+class MapResult<MapType> extends BlockingQueueValue<MapType> {
+    final int jobID;
+
+    /**
+     * Create a new MapResult with value datum and jod jobID ID
+     *
+     * @param datum the value produced by the map job
+     * @param jobID the id of the map job (for correctness testing)
+     */
+    MapResult(final MapType datum, final int jobID) {
+        super(datum);
+        this.jobID = jobID;
+        if ( jobID < 0 ) throw new IllegalArgumentException("JobID must be >= 0");
+    }
+
+    /**
+     * Create the EOF marker version of MapResult
+     */
+    MapResult() {
+        super();
+        this.jobID = Integer.MAX_VALUE;
+    }
+
+    /**
+     * @return the job ID of the map job that produced this MapResult
+     */
+    public int getJobID() {
+        return jobID;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSMapFunction.java
@ -9,7 +9,7 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 * Date: 8/24/12
 * Time: 9:49 AM
 */
-public interface MapFunction<InputType, ResultType> {
+public interface NSMapFunction<InputType, ResultType> {
    /**
     * Return function on input, returning a value of ResultType
     * @param input
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSProgressFunction.java
@ -0,0 +1,12 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: depristo
+ * Date: 9/4/12
+ * Time: 2:10 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface NSProgressFunction<InputType> {
+    public void progress(final InputType lastMapInput);
+}
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSReduceFunction.java
@ -7,7 +7,7 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 * Date: 8/24/12
 * Time: 9:49 AM
 */
-public interface ReduceFunction<MapType, ReduceType> {
+public interface NSReduceFunction<MapType, ReduceType> {
    /**
     * Combine one with sum into a new ReduceType
     * @param one the result of a map call on an input element
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@ -3,13 +3,13 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.AutoFormattingTime;
+import org.broadinstitute.sting.utils.SimpleTimer;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.threading.NamedThreadFactory;

 import java.util.Iterator;
-import java.util.LinkedList;
 import java.util.List;
-import java.util.Queue;
 import java.util.concurrent.*;

 /**
@ -17,12 +17,12 @@ import java.util.concurrent.*;
 *
 * The overall framework works like this
 *
- * nano <- new Nanoschedule(bufferSize, numberOfMapElementsToProcessTogether, nThreads)
+ * nano <- new Nanoschedule(inputBufferSize, numberOfMapElementsToProcessTogether, nThreads)
 * List[Input] outerData : outerDataLoop )
 *   result = nano.execute(outerData.iterator(), map, reduce)
 *
- * bufferSize determines how many elements from the input stream are read in one go by the
- * nanoscheduler.  The scheduler may hold up to bufferSize in memory at one time, as well
+ * inputBufferSize determines how many elements from the input stream are read in one go by the
+ * nanoscheduler.  The scheduler may hold up to inputBufferSize in memory at one time, as well
 * as up to inputBufferSize map results as well.
 *
 * numberOfMapElementsToProcessTogether determines how many input elements are processed
@ -45,42 +45,54 @@ import java.util.concurrent.*;
 public class NanoScheduler<InputType, MapType, ReduceType> {
    private final static Logger logger = Logger.getLogger(NanoScheduler.class);
    private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
+    private final static boolean LOG_MAP_TIMES = false;
+    private final static boolean TIME_CALLS = true;

-    final int bufferSize;
-    final int mapGroupSize;
+    private final static int MAP_BUFFER_SIZE_SCALE_FACTOR = 100;
+
+    final int inputBufferSize;
+    final int mapBufferSize;
    final int nThreads;
-    final ExecutorService executor;
+    final ExecutorService inputExecutor;
+    final ExecutorService reduceExecutor;
+    final ThreadPoolExecutor mapExecutor;
+
    boolean shutdown = false;
    boolean debug = false;
+    private NSProgressFunction<InputType> progressFunction = null;
+
+    final SimpleTimer outsideSchedulerTimer = TIME_CALLS ? new SimpleTimer("outside") : null;
+    final SimpleTimer inputTimer = TIME_CALLS ? new SimpleTimer("input") : null;
+    final SimpleTimer mapTimer = TIME_CALLS ? new SimpleTimer("map") : null;
+    final SimpleTimer reduceTimer = TIME_CALLS ? new SimpleTimer("reduce") : null;

    /**
-     * Create a new nanoschedule with the desire characteristics requested by the argument
+     * Create a new nanoscheduler with the desire characteristics requested by the argument
     *
-     * @param bufferSize the number of input elements to read in each scheduling cycle.
-     * @param mapGroupSize How many inputs should be grouped together per map?  If -1 we make a reasonable guess
-     * @param nThreads the number of threads to use to get work done, in addition to the thread calling execute
+     * @param inputBufferSize the number of input elements to read in each scheduling cycle.
+     * @param nThreads the number of threads to use to get work done, in addition to the
+     *                 thread calling execute
     */
-    public NanoScheduler(final int bufferSize,
-                         final int mapGroupSize,
-                         final int nThreads) {
-        if ( bufferSize < 1 ) throw new IllegalArgumentException("bufferSize must be >= 1, got " + bufferSize);
+    public NanoScheduler(final int inputBufferSize, final int nThreads) {
+        if ( inputBufferSize < 1 ) throw new IllegalArgumentException("inputBufferSize must be >= 1, got " + inputBufferSize);
        if ( nThreads < 1 ) throw new IllegalArgumentException("nThreads must be >= 1, got " + nThreads);

-        if ( mapGroupSize > bufferSize ) throw new IllegalArgumentException("mapGroupSize " + mapGroupSize + " must be <= bufferSize " + bufferSize);
-        if ( mapGroupSize == 0 || mapGroupSize < -1 ) throw new IllegalArgumentException("mapGroupSize cannot be <= 0" + mapGroupSize);
-
-        this.bufferSize = bufferSize;
+        this.inputBufferSize = inputBufferSize;
+        this.mapBufferSize = inputBufferSize * MAP_BUFFER_SIZE_SCALE_FACTOR;
        this.nThreads = nThreads;

-        if ( mapGroupSize == -1 ) {
-            this.mapGroupSize = (int)Math.ceil(this.bufferSize / (10.0*this.nThreads));
-            logger.info(String.format("Dynamically setting grouping size to %d based on buffer size %d and n threads %d",
-                    this.mapGroupSize, this.bufferSize, this.nThreads));
+        if ( nThreads == 1 ) {
+            this.mapExecutor = null;
+            this.inputExecutor = this.reduceExecutor = null;
        } else {
-            this.mapGroupSize = mapGroupSize;
+            this.mapExecutor = (ThreadPoolExecutor)Executors.newFixedThreadPool(nThreads-1, new NamedThreadFactory("NS-map-thread-%d"));
+            this.mapExecutor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
+            this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
+            this.reduceExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-reduce-thread-%d"));
        }

-        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
+        // start timing the time spent outside of the nanoScheduler
+        outsideSchedulerTimer.start();
    }

    /**
@ -97,17 +109,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     * @return
     */
    @Ensures("result > 0")
-    public int getBufferSize() {
-        return bufferSize;
-    }
-
-    /**
-     * The grouping size used by this NanoScheduler
-     * @return
-     */
-    @Ensures("result > 0")
-    public int getMapGroupSize() {
-        return mapGroupSize;
+    public int getInputBufferSize() {
+        return inputBufferSize;
    }

    /**
@ -116,12 +119,54 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     * After this call, execute cannot be invoked without throwing an error
     */
    public void shutdown() {
-        if ( executor != null ) {
-            final List<Runnable> remaining = executor.shutdownNow();
-            if ( ! remaining.isEmpty() )
-                throw new IllegalStateException("Remaining tasks found in the executor, unexpected behavior!");
+        outsideSchedulerTimer.stop();
+
+        if ( nThreads > 1 ) {
+            shutdownExecutor("inputExecutor", inputExecutor);
+            shutdownExecutor("mapExecutor", mapExecutor);
+            shutdownExecutor("reduceExecutor", reduceExecutor);
        }
        shutdown = true;
+
+        if (TIME_CALLS) {
+            printTimerInfo("Input   time", inputTimer);
+            printTimerInfo("Map     time", mapTimer);
+            printTimerInfo("Reduce  time", reduceTimer);
+            printTimerInfo("Outside time", outsideSchedulerTimer);
+        }
+    }
+
+    /**
+     * Helper function to cleanly shutdown an execution service, checking that the execution
+     * state is clean when it's done.
+     *
+     * @param name a string name for error messages for the executorService we are shutting down
+     * @param executorService the executorService to shut down
+     */
+    @Requires({"name != null", "executorService != null"})
+    @Ensures("executorService.isShutdown()")
+    private void shutdownExecutor(final String name, final ExecutorService executorService) {
+        if ( executorService.isShutdown() || executorService.isTerminated() )
+            throw new IllegalStateException("Executor service " + name + " is already shut down!");
+
+        final List<Runnable> remaining = executorService.shutdownNow();
+        if ( ! remaining.isEmpty() )
+            throw new IllegalStateException(remaining.size() + " remaining tasks found in an executor " + name + ", unexpected behavior!");
+    }
+
+    /**
+     * Print to logger.info timing information from timer, with name label
+     *
+     * @param label the name of the timer to display.  Should be human readable
+     * @param timer the timer whose elapsed time we will display
+     */
+    @Requires({"label != null", "timer != null"})
+    private void printTimerInfo(final String label, final SimpleTimer timer) {
+        final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime()
+                + reduceTimer.getElapsedTime() + outsideSchedulerTimer.getElapsedTime();
+        final double myTimeInSec = timer.getElapsedTime();
+        final double myTimePercent = myTimeInSec / total * 100;
+        logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
    }

    /**
@ -131,20 +176,45 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
        return shutdown;
    }

+    /**
+     * @return are we displaying verbose debugging information about the scheduling?
+     */
    public boolean isDebug() {
        return debug;
    }

+    /**
+     * Helper function to display a String.formatted message if we are doing verbose debugging
+     *
+     * @param format the format argument suitable for String.format
+     * @param args the arguments for String.format
+     */
+    @Requires("format != null")
    private void debugPrint(final String format, Object ... args) {
        if ( isDebug() )
            logger.info("Thread " + Thread.currentThread().getId() + ":" + String.format(format, args));
    }

-
+    /**
+     * Turn on/off verbose debugging
+     *
+     * @param debug true if we want verbose debugging
+     */
    public void setDebug(boolean debug) {
        this.debug = debug;
    }

+    /**
+     * Set the progress callback function to progressFunction
+     *
+     * The progress callback is invoked after each buffer size elements have been processed by map/reduce
+     *
+     * @param progressFunction a progress function to call, or null if you don't want any progress callback
+     */
+    public void setProgressFunction(final NSProgressFunction<InputType> progressFunction) {
+        this.progressFunction = progressFunction;
+    }
+
    /**
     * Execute a map/reduce job with this nanoScheduler
     *
@ -159,41 +229,73 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     * It is safe to call this function repeatedly on a single nanoScheduler, at least until the
     * shutdown method is called.
     *
-     * @param inputReader
-     * @param map
-     * @param reduce
-     * @return
+     * Note that this function goes through a single threaded fast path if the number of threads
+     * is 1.
+     *
+     * @param inputReader an iterator providing us with the input data to nanoSchedule map/reduce over
+     * @param map the map function from input type -> map type, will be applied in parallel to each input
+     * @param reduce the reduce function from map type + reduce type -> reduce type to be applied in order to map results
+     * @return the last reduce value
     */
    public ReduceType execute(final Iterator<InputType> inputReader,
-                              final MapFunction<InputType, MapType> map,
+                              final NSMapFunction<InputType, MapType> map,
                              final ReduceType initialValue,
-                              final ReduceFunction<MapType, ReduceType> reduce) {
+                              final NSReduceFunction<MapType, ReduceType> reduce) {
        if ( isShutdown() ) throw new IllegalStateException("execute called on already shutdown NanoScheduler");
        if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
        if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
        if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");

+        outsideSchedulerTimer.stop();
+
+        ReduceType result;
        if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
-            return executeSingleThreaded(inputReader, map, initialValue, reduce);
+            result = executeSingleThreaded(inputReader, map, initialValue, reduce);
        } else {
-            return executeMultiThreaded(inputReader, map, initialValue, reduce);
+            result = executeMultiThreaded(inputReader, map, initialValue, reduce);
        }
+
+        outsideSchedulerTimer.restart();
+        return result;
    }

    /**
-     * Simple efficient reference implementation for single threaded execution
+     * Simple efficient reference implementation for single threaded execution.
+     *
     * @return the reduce result of this map/reduce job
     */
+    @Requires({"inputReader != null", "map != null", "reduce != null"})
    private ReduceType executeSingleThreaded(final Iterator<InputType> inputReader,
-                                             final MapFunction<InputType, MapType> map,
+                                             final NSMapFunction<InputType, MapType> map,
                                             final ReduceType initialValue,
-                                             final ReduceFunction<MapType, ReduceType> reduce) {
+                                             final NSReduceFunction<MapType, ReduceType> reduce) {
        ReduceType sum = initialValue;
+        int i = 0;
+
+        // start timer to ensure that both hasNext and next are caught by the timer
+        if ( TIME_CALLS ) inputTimer.restart();
        while ( inputReader.hasNext() ) {
            final InputType input = inputReader.next();
+            if ( TIME_CALLS ) inputTimer.stop();
+
+            // map
+            if ( TIME_CALLS ) mapTimer.restart();
+            final long preMapTime = LOG_MAP_TIMES ? 0 : mapTimer.currentTimeNano();
            final MapType mapValue = map.apply(input);
+            if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (mapTimer.currentTimeNano() - preMapTime));
+            if ( TIME_CALLS ) mapTimer.stop();
+
+            if ( i++ % inputBufferSize == 0 && progressFunction != null )
+                progressFunction.progress(input);
+
+            // reduce
+            if ( TIME_CALLS ) reduceTimer.restart();
            sum = reduce.apply(mapValue, sum);
+            if ( TIME_CALLS ) reduceTimer.stop();
+
+            if ( TIME_CALLS ) inputTimer.restart();
        }
+
        return sum;
    }

@ -202,101 +304,89 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     *
     * @return the reduce result of this map/reduce job
     */
+    @Requires({"inputReader != null", "map != null", "reduce != null"})
    private ReduceType executeMultiThreaded(final Iterator<InputType> inputReader,
-                                            final MapFunction<InputType, MapType> map,
+                                            final NSMapFunction<InputType, MapType> map,
                                            final ReduceType initialValue,
-                                            final ReduceFunction<MapType, ReduceType> reduce) {
+                                            final NSReduceFunction<MapType, ReduceType> reduce) {
        debugPrint("Executing nanoScheduler");
-        ReduceType sum = initialValue;
-        while ( inputReader.hasNext() ) {
-            try {
-                // read in our input values
-                final List<InputType> inputs = readInputs(inputReader);

-                // send jobs for map
-                final Queue<Future<List<MapType>>> mapQueue = submitMapJobs(map, executor, inputs);
+        // a blocking queue that limits the number of input datum to the requested buffer size
+        final BlockingQueue<InputProducer<InputType>.InputValue> inputQueue
+                = new LinkedBlockingDeque<InputProducer<InputType>.InputValue>(inputBufferSize);

-                // send off the reduce job, and block until we get at least one reduce result
-                sum = reduceParallel(reduce, mapQueue, sum);
-            } catch (InterruptedException ex) {
-                throw new ReviewedStingException("got execution exception", ex);
-            } catch (ExecutionException ex) {
-                throw new ReviewedStingException("got execution exception", ex);
+        // a priority queue that stores up to mapBufferSize elements
+        // produced by completed map jobs.
+        final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue =
+                new LinkedBlockingDeque<Future<MapResult<MapType>>>(mapBufferSize);
+
+        // Start running the input reader thread
+        inputExecutor.submit(new InputProducer<InputType>(inputReader, inputTimer, inputQueue));
+
+        // Start running the reducer thread
+        final ReducerThread<MapType, ReduceType> reducer
+                = new ReducerThread<MapType, ReduceType>(reduce, reduceTimer, initialValue, mapResultQueue);
+        final Future<ReduceType> reduceResult = reduceExecutor.submit(reducer);
+
+        try {
+            int numJobs = 0;
+
+            while ( true ) {
+                // block on input
+                final InputProducer<InputType>.InputValue inputEnqueueWrapped = inputQueue.take();
+
+                if ( ! inputEnqueueWrapped.isLast() ) {
+                    // get the object itself
+                    final InputType input = inputEnqueueWrapped.getValue();
+
+                    // the next map call has jobID + 1
+                    numJobs++;
+
+                    // send job for map via the completion service
+                    final CallableMap doMap = new CallableMap(map, numJobs, input);
+                    final Future<MapResult<MapType>> mapJob = mapExecutor.submit(doMap);
+                    mapResultQueue.put(mapJob);
+
+                    debugPrint("  Done with cycle of map/reduce");
+
+                    if ( numJobs % inputBufferSize == 0 && progressFunction != null )
+                        progressFunction.progress(input);
+                } else {
+                    mapResultQueue.put(new FutureValue<MapResult<MapType>>(new MapResult<MapType>()));
+                    return reduceResult.get(); // wait for our result of reduce
+                }
            }
+        } catch (InterruptedException ex) {
+            throw new ReviewedStingException("got execution exception", ex);
+        } catch (ExecutionException ex) {
+            throw new ReviewedStingException("got execution exception", ex);
        }
-
-        return sum;
-    }
-
-    @Requires({"reduce != null", "! mapQueue.isEmpty()"})
-    private ReduceType reduceParallel(final ReduceFunction<MapType, ReduceType> reduce,
-                                      final Queue<Future<List<MapType>>> mapQueue,
-                                      final ReduceType initSum)
-            throws InterruptedException, ExecutionException {
-        ReduceType sum = initSum;
-
-        // while mapQueue has something in it to reduce
-        for ( final Future<List<MapType>> future : mapQueue ) {
-            for ( final MapType value : future.get() ) // block until we get the values for this task
-                sum = reduce.apply(value, sum);
-        }
-
-        return sum;
-    }
-
-    /**
-     * Read up to inputBufferSize elements from inputReader
-     *
-     * @return a queue of inputs read in, containing one or more values of InputType read in
-     */
-    @Requires("inputReader.hasNext()")
-    @Ensures("!result.isEmpty()")
-    private List<InputType> readInputs(final Iterator<InputType> inputReader) {
-        int n = 0;
-        final List<InputType> inputs = new LinkedList<InputType>();
-        while ( inputReader.hasNext() && n < getBufferSize() ) {
-            final InputType input = inputReader.next();
-            inputs.add(input);
-            n++;
-        }
-        return inputs;
-    }
-
-    @Requires({"map != null", "! inputs.isEmpty()"})
-    private Queue<Future<List<MapType>>> submitMapJobs(final MapFunction<InputType, MapType> map,
-                                                       final ExecutorService executor,
-                                                       final List<InputType> inputs) {
-        final Queue<Future<List<MapType>>> mapQueue = new LinkedList<Future<List<MapType>>>();
-
-        for ( final List<InputType> subinputs : Utils.groupList(inputs, getMapGroupSize()) ) {
-            final CallableMap doMap = new CallableMap(map, subinputs);
-            final Future<List<MapType>> future = executor.submit(doMap);
-            mapQueue.add(future);
-        }
-
-        return mapQueue;
    }

    /**
     * A simple callable version of the map function for use with the executor pool
     */
-    private class CallableMap implements Callable<List<MapType>> {
-        final List<InputType> inputs;
-        final MapFunction<InputType, MapType> map;
+    private class CallableMap implements Callable<MapResult<MapType>> {
+        final int id;
+        final InputType input;
+        final NSMapFunction<InputType, MapType> map;

-        @Requires({"map != null", "inputs.size() <= getMapGroupSize()"})
-        private CallableMap(final MapFunction<InputType, MapType> map, final List<InputType> inputs) {
-            this.inputs = inputs;
+        @Requires({"map != null"})
+        private CallableMap(final NSMapFunction<InputType, MapType> map,
+                            final int id,
+                            final InputType input) {
+            this.id = id;
+            this.input = input;
            this.map = map;
        }

-        @Ensures("result.size() == inputs.size()")
-        @Override public List<MapType> call() throws Exception {
-            final List<MapType> outputs = new LinkedList<MapType>();
-            for ( final InputType input : inputs )
-                outputs.add(map.apply(input));
-            debugPrint("    Processed %d elements with map", outputs.size());
-            return outputs;
+        @Override
+        public MapResult<MapType> call() {
+            if ( TIME_CALLS ) mapTimer.restart();
+            if ( debug ) debugPrint("\t\tmap " + input);
+            final MapType result = map.apply(input);
+            if ( TIME_CALLS ) mapTimer.stop();
+            return new MapResult<MapType>(result, id);
        }
    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReducerThread.java
@ -0,0 +1,65 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+import org.broadinstitute.sting.utils.SimpleTimer;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.Future;
+
+/**
+ * Thread that runs the reduce of the map/reduce.
+ *
+ * This thread reads from mapResultsQueue until the poison EOF object arrives.  At each
+ * stage is calls reduce(value, sum).  The blocking mapResultQueue ensures that the
+ * queue waits until the mapResultQueue has a value to take. Then, it gets and waits
+ * until the map result Future has a value.
+ */
+class ReducerThread<MapType, ReduceType> implements Callable<ReduceType> {
+    final NSReduceFunction<MapType, ReduceType> reduce;
+    final SimpleTimer reduceTimer;
+    final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue;
+
+    ReduceType sum;
+    int lastJobID = -1;
+
+    public ReducerThread(final NSReduceFunction<MapType, ReduceType> reduce,
+                         final SimpleTimer reduceTimer,
+                         final ReduceType sum,
+                         final BlockingQueue<Future<MapResult<MapType>>> mapResultQueue) {
+        if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
+        if ( mapResultQueue == null ) throw new IllegalArgumentException("mapResultQueue cannot be null");
+
+        this.reduce = reduce;
+        this.reduceTimer = reduceTimer;
+        this.sum = sum;
+        this.mapResultQueue = mapResultQueue;
+    }
+
+    public ReduceType call() {
+        try {
+            while ( true ) {
+                final MapResult<MapType> result = mapResultQueue.take().get();
+                if ( result.isLast() ) {
+                    // we are done, just return sum
+                    return sum;
+                }
+                else if ( result.getJobID() < lastJobID ) {
+                    // make sure the map results are coming in order
+                    throw new IllegalStateException("BUG: last jobID " + lastJobID + " > current jobID " + result.getJobID());
+                } else {
+                    lastJobID = result.getJobID();
+                    // apply reduce, keeping track of sum
+                    if ( reduceTimer != null ) reduceTimer.restart();
+                    sum = reduce.apply(result.getValue(), sum);
+                    if ( reduceTimer != null ) reduceTimer.stop();
+                }
+            }
+        } catch (ExecutionException ex) {
+            throw new ReviewedStingException("got execution exception", ex);
+        } catch (InterruptedException ex) {
+            throw new ReviewedStingException("got execution exception", ex);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@ -613,6 +613,8 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
    /**
     * Returns a pileup randomly downsampled to the desiredCoverage.
     *
+     * TODO: delete this once the experimental downsampler stabilizes
+     *
     * @param desiredCoverage
     * @return
     */
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialMultiSampleReadStream.java
@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.picard.sam.MergingSamRecordIterator;
+import net.sf.picard.sam.SamFileHeaderMerger;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.*;
+
+/**
+ * Simple wrapper class that multiplexes multiple ArtificialSingleSampleReadStreams into a single stream of reads
+ *
+ * @author David Roazen
+ */
+public class ArtificialMultiSampleReadStream implements Iterable<SAMRecord> {
+
+    private Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
+    private MergingSamRecordIterator mergingIterator;
+
+    public ArtificialMultiSampleReadStream( Collection<ArtificialSingleSampleReadStream> perSampleArtificialReadStreams ) {
+        if ( perSampleArtificialReadStreams == null || perSampleArtificialReadStreams.isEmpty() ) {
+            throw new ReviewedStingException("Can't create an ArtificialMultiSampleReadStream out of 0 ArtificialSingleSampleReadStreams");
+        }
+
+        this.perSampleArtificialReadStreams = perSampleArtificialReadStreams;
+    }
+
+    public Iterator<SAMRecord> iterator() {
+        // lazy initialization to prevent reads from being created until they're needed
+        initialize();
+
+        return mergingIterator;
+    }
+
+    public StingSAMIterator getStingSAMIterator() {
+        // lazy initialization to prevent reads from being created until they're needed
+        initialize();
+
+        return StingSAMIteratorAdapter.adapt(mergingIterator);
+    }
+
+    private void initialize() {
+        Collection<SAMFileReader> perSampleSAMReaders = new ArrayList<SAMFileReader>(perSampleArtificialReadStreams.size());
+        Collection<SAMFileHeader> headers = new ArrayList<SAMFileHeader>(perSampleArtificialReadStreams.size());
+
+        for ( ArtificialSingleSampleReadStream readStream : perSampleArtificialReadStreams ) {
+            Collection<SAMRecord> thisStreamReads = readStream.makeReads();
+
+            SAMFileReader reader = new ArtificialSAMFileReader(readStream.getHeader(),
+                                                               thisStreamReads.toArray(new SAMRecord[thisStreamReads.size()]));
+            perSampleSAMReaders.add(reader);
+            headers.add(reader.getFileHeader());
+        }
+
+        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate, headers, true);
+        mergingIterator = new MergingSamRecordIterator(headerMerger, perSampleSAMReaders, true);
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMFileReader.java
@ -40,8 +40,11 @@ public class ArtificialSAMFileReader extends SAMFileReader {
     */
    private final List<SAMRecord> reads;

+    private SAMFileHeader customHeader = null;
+
    /**
     * Construct an artificial SAM file reader.
+     * @param sequenceDictionary sequence dictionary used to initialize our GenomeLocParser
     * @param reads Reads to use as backing data source.
     */
    public ArtificialSAMFileReader(SAMSequenceDictionary sequenceDictionary,SAMRecord... reads) {
@ -50,6 +53,30 @@ public class ArtificialSAMFileReader extends SAMFileReader {
        this.reads = Arrays.asList(reads);
    }

+    /**
+     * Construct an artificial SAM file reader with the given SAM file header
+     *
+     * @param customHeader Header that should be returned by calls to getFileHeader() on this reader
+     * @param reads Reads to use as backing data source.
+     */
+    public ArtificialSAMFileReader( SAMFileHeader customHeader, SAMRecord... reads ) {
+        super(createEmptyInputStream(),true);
+
+        this.customHeader = customHeader;
+        this.genomeLocParser = new GenomeLocParser(customHeader.getSequenceDictionary());
+        this.reads = Arrays.asList(reads);
+    }
+
+
+    @Override
+    public SAMFileHeader getFileHeader() {
+        if ( customHeader != null ) {
+            return customHeader;
+        }
+
+        return super.getFileHeader();
+    }
+
    /**
     * @{inheritDoc}
     */
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSAMUtils.java
@ -276,6 +276,30 @@ public class ArtificialSAMUtils {
        return Arrays.asList(left, right);
    }

+    /**
+     * Create a collection of identical artificial reads based on the parameters.  The cigar string for each
+     * read will be *M, where * is the length of the read.
+     *
+     * Useful for testing things like positional downsampling where you care only about the position and
+     * number of reads, and not the other attributes.
+     *
+     * @param stackSize      number of identical reads to create
+     * @param header         the SAM header to associate each read with
+     * @param name           name associated with each read
+     * @param refIndex       the reference index, i.e. what chromosome to associate them with
+     * @param alignmentStart where to start each alignment
+     * @param length         the length of each read
+     *
+     * @return a collection of stackSize reads all sharing the above properties
+     */
+    public static Collection<GATKSAMRecord> createStackOfIdenticalArtificialReads( int stackSize, SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
+        Collection<GATKSAMRecord> stack = new ArrayList<GATKSAMRecord>(stackSize);
+        for ( int i = 1; i <= stackSize; i++ ) {
+            stack.add(createArtificialRead(header, name, refIndex, alignmentStart, length));
+        }
+        return stack;
+    }
+
    /**
     * create an iterator containing the specified read piles
     *
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStream.java
@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Iterator;
+
+/**
+ * An artificial stream of reads from a single read group/sample with configurable characteristics
+ * such as:
+ *
+ * -the number of contigs that the reads should be distributed across
+ * -number of "stacks" of reads sharing the same alignment start position per contig
+ * -the min/max number of reads in each stack (exact values chosen randomly from this range)
+ * -the min/max distance between stack start positions (exact values chosen randomly from this range)
+ * -the min/max length of each read (exact values chosen randomly from this range)
+ * -the number of unmapped reads
+ *
+ * The cigar string for all reads will be *M, where * is the length of the read.
+ *
+ * @author David Roazen
+ */
+public class ArtificialSingleSampleReadStream implements Iterable<SAMRecord> {
+    private SAMFileHeader header;
+    private String readGroupID;
+    private int numContigs;
+    private int numStacksPerContig;
+    private int minReadsPerStack;
+    private int maxReadsPerStack;
+    private int minDistanceBetweenStacks;
+    private int maxDistanceBetweenStacks;
+    private int minReadLength;
+    private int maxReadLength;
+    private int numUnmappedReads;
+
+    private static final String READ_GROUP_TAG = "RG";
+
+    public ArtificialSingleSampleReadStream( SAMFileHeader header,
+                                             String readGroupID,
+                                             int numContigs,
+                                             int numStacksPerContig,
+                                             int minReadsPerStack,
+                                             int maxReadsPerStack,
+                                             int minDistanceBetweenStacks,
+                                             int maxDistanceBetweenStacks,
+                                             int minReadLength,
+                                             int maxReadLength,
+                                             int numUnmappedReads ) {
+        this.header = header;
+        this.readGroupID = readGroupID;
+        this.numContigs = numContigs;
+        this.numStacksPerContig = numStacksPerContig;
+        this.minReadsPerStack = minReadsPerStack;
+        this.maxReadsPerStack = maxReadsPerStack;
+        this.minDistanceBetweenStacks = minDistanceBetweenStacks;
+        this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
+        this.minReadLength = minReadLength;
+        this.maxReadLength = maxReadLength;
+        this.numUnmappedReads = numUnmappedReads;
+
+        validateStreamParameters();
+    }
+
+    private void validateStreamParameters() {
+        if ( header == null || readGroupID == null ) {
+            throw new ReviewedStingException("null SAMFileHeader or read group ID") ;
+        }
+
+        if ( header.getReadGroup(readGroupID) == null ) {
+            throw new ReviewedStingException(String.format("Read group %s not found in SAMFileHeader", readGroupID));
+        }
+
+        if ( numContigs < 0 || numStacksPerContig < 0 || minReadsPerStack < 0 || maxReadsPerStack < 0 ||
+             minDistanceBetweenStacks < 0 || maxDistanceBetweenStacks < 0 || minReadLength < 0 || maxReadLength < 0 ||
+             numUnmappedReads < 0 ) {
+            throw new ReviewedStingException("Read stream parameters must be >= 0");
+        }
+
+        if ( (numContigs == 0 && numStacksPerContig != 0) || (numContigs != 0 && numStacksPerContig == 0) ) {
+            throw new ReviewedStingException("numContigs and numStacksPerContig must either both be > 0, or both be 0");
+        }
+
+        if ( minReadsPerStack > maxReadsPerStack ) {
+            throw new ReviewedStingException("minReadsPerStack > maxReadsPerStack");
+        }
+
+        if ( minDistanceBetweenStacks > maxDistanceBetweenStacks ) {
+            throw new ReviewedStingException("minDistanceBetweenStacks > maxDistanceBetweenStacks");
+        }
+
+        if ( minReadLength > maxReadLength ) {
+            throw new ReviewedStingException("minReadLength > maxReadLength");
+        }
+    }
+
+    public Iterator<SAMRecord> iterator() {
+        return makeReads().iterator();
+    }
+
+    public StingSAMIterator getStingSAMIterator() {
+        return StingSAMIteratorAdapter.adapt(iterator());
+    }
+
+    public Collection<SAMRecord> makeReads() {
+        Collection<SAMRecord> reads = new ArrayList<SAMRecord>(numContigs * numStacksPerContig * maxReadsPerStack);
+
+        for ( int contig = 0; contig < numContigs; contig++ ) {
+            int alignmentStart = 1;
+
+            for ( int stack = 0; stack < numStacksPerContig; stack++ ) {
+                reads.addAll(makeReadStack(contig, alignmentStart, MathUtils.randomIntegerInRange(minReadsPerStack, maxReadsPerStack)));
+                alignmentStart += MathUtils.randomIntegerInRange(minDistanceBetweenStacks, maxDistanceBetweenStacks);
+            }
+        }
+
+        if ( numUnmappedReads > 0 ) {
+            reads.addAll(makeReadStack(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX, SAMRecord.NO_ALIGNMENT_START, numUnmappedReads));
+        }
+
+        return reads;
+    }
+
+    private Collection<SAMRecord> makeReadStack( int contig, int alignmentStart, int stackSize ) {
+        Collection<SAMRecord> readStack = new ArrayList<SAMRecord>(stackSize);
+
+        for ( int i = 0; i < stackSize; i++ ) {
+            SAMRecord read = ArtificialSAMUtils.createArtificialRead(header,
+                                                                     "foo",
+                                                                     contig,
+                                                                     alignmentStart,
+                                                                     MathUtils.randomIntegerInRange(minReadLength, maxReadLength));
+            read.setAttribute(READ_GROUP_TAG, readGroupID);
+            readStack.add(read);
+        }
+
+        return readStack;
+    }
+
+    public SAMFileHeader getHeader() {
+        return header;
+    }
+
+    public String getReadGroupID() {
+        return readGroupID;
+    }
+
+    public int getNumContigs() {
+        return numContigs;
+    }
+
+    public int getNumStacksPerContig() {
+        return numStacksPerContig;
+    }
+
+    public int getMinReadsPerStack() {
+        return minReadsPerStack;
+    }
+
+    public int getMaxReadsPerStack() {
+        return maxReadsPerStack;
+    }
+
+    public int getMinDistanceBetweenStacks() {
+        return minDistanceBetweenStacks;
+    }
+
+    public int getMaxDistanceBetweenStacks() {
+        return maxDistanceBetweenStacks;
+    }
+
+    public int getMinReadLength() {
+        return minReadLength;
+    }
+
+    public int getMaxReadLength() {
+        return maxReadLength;
+    }
+
+    public int getNumUnmappedReads() {
+        return numUnmappedReads;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/ArtificialSingleSampleReadStreamAnalyzer.java
@ -0,0 +1,281 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A class for analyzing and validating the read stream produced by an ArtificialSingleSampleReadStream.
+ *
+ * Collects various statistics about the stream of reads it's fed, and validates the stream
+ * by checking whether the collected statistics match the nominal properties of the stream.
+ *
+ * Subclasses are expected to override the validate() method in order to check whether an artificial
+ * read stream has been *transformed* in some way (eg., by downsampling or some other process), rather
+ * than merely checking whether the stream matches its original properties.
+ *
+ * Usage is simple:
+ *
+ * ArtificialSingleSampleReadStreamAnalyzer analyzer = new ArtificialSingleSampleReadStreamAnalyzer(originalStream);
+ * analyzer.analyze(originalOrTransformedStream);
+ * analyzer.validate();  // override this method if you want to check whether the stream has been transformed
+ *                       // in a certain way relative to the original stream
+ *
+ * @author David Roazen
+ */
+public class ArtificialSingleSampleReadStreamAnalyzer {
+    protected ArtificialSingleSampleReadStream originalStream;
+    protected SAMRecord lastRead;
+    protected int totalReads;
+    protected boolean allSamplesMatch;
+    protected int numContigs;
+    protected List<Integer> stacksPerContig;
+    protected Integer minReadsPerStack;
+    protected Integer maxReadsPerStack;
+    protected Integer minDistanceBetweenStacks;
+    protected Integer maxDistanceBetweenStacks;
+    protected Integer minReadLength;
+    protected Integer maxReadLength;
+    protected int numUnmappedReads;
+
+    protected int currentContigNumStacks;
+    protected int currentStackNumReads;
+
+    /**
+     * Construct a new read stream analyzer, providing an ArtificialSingleSampleReadStream that will
+     * serve as the basis for comparison after the analysis is complete.
+     *
+     * @param originalStream the original ArtificialSingleSampleReadStream upon which the stream
+     *                       that will be fed to the analyzer is based
+     */
+    public ArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream ) {
+        this.originalStream = originalStream;
+        reset();
+    }
+
+    /**
+     * Reset all read stream statistics collected by this analyzer to prepare for a fresh run
+     */
+    public void reset() {
+        lastRead = null;
+        totalReads = 0;
+        allSamplesMatch = true;
+        numContigs = 0;
+        stacksPerContig = new ArrayList<Integer>();
+        minReadsPerStack = null;
+        maxReadsPerStack = null;
+        minDistanceBetweenStacks = null;
+        maxDistanceBetweenStacks = null;
+        minReadLength = null;
+        maxReadLength = null;
+        numUnmappedReads = 0;
+        currentContigNumStacks = 0;
+        currentStackNumReads = 0;
+    }
+
+    /**
+     * Collect statistics on the stream of reads passed in
+     *
+     * @param stream the stream of reads to analyze
+     */
+    public void analyze( Iterable<SAMRecord> stream ) {
+        for ( SAMRecord read : stream ) {
+            update(read);
+        }
+        finalizeStats();
+    }
+
+    /**
+     * Validate the stream by checking whether our collected statistics match the properties of the
+     * original stream. Throws a ReviewedStingException if the stream is invalid.
+     *
+     * Override this method if you want to check whether the stream has been transformed in some
+     * way relative to the original stream.
+     */
+    public void validate() {
+        if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
+            if ( totalReads != 0 ) {
+                throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
+            }
+            return;  // no further validation needed for the 0-reads case
+        }
+        else if ( totalReads == 0 ) {
+            throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
+        }
+
+        if ( ! allSamplesMatch ) {
+            throw new ReviewedStingException("some reads had the wrong sample");
+        }
+
+        if ( numContigs != originalStream.getNumContigs() ) {
+            throw new ReviewedStingException("number of contigs not correct");
+        }
+
+        if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
+            throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
+                                                           stacksPerContig.size(), originalStream.getNumContigs()));
+        }
+
+        for ( int contigStackCount : stacksPerContig ) {
+            if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
+                throw new ReviewedStingException("contig had incorrect number of stacks");
+            }
+        }
+
+        if ( originalStream.getNumStacksPerContig() > 0 ) {
+            if ( minReadsPerStack < originalStream.getMinReadsPerStack() ) {
+                throw new ReviewedStingException("stack had fewer than the minimum number of reads");
+            }
+            if ( maxReadsPerStack > originalStream.getMaxReadsPerStack() ) {
+                throw new ReviewedStingException("stack had more than the maximum number of reads");
+            }
+        }
+        else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
+            throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
+        }
+
+        if ( originalStream.getNumStacksPerContig() > 1 ) {
+            if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
+                throw new ReviewedStingException("stacks were separated by less than the minimum distance");
+            }
+            if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
+                throw new ReviewedStingException("stacks were separated by more than the maximum distance");
+            }
+        }
+        else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
+            throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
+        }
+
+        if ( minReadLength < originalStream.getMinReadLength() ) {
+            throw new ReviewedStingException("read was shorter than the minimum allowed length");
+        }
+        if ( maxReadLength > originalStream.getMaxReadLength() ) {
+            throw new ReviewedStingException("read was longer than the maximum allowed length");
+        }
+
+        if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
+            throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
+                                                           originalStream.getNumUnmappedReads(), numUnmappedReads));
+        }
+
+        if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
+             numUnmappedReads != totalReads ) {
+            throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
+        }
+    }
+
+    public void update( SAMRecord read ) {
+        if ( read.getReadUnmappedFlag() ) {
+            numUnmappedReads++;
+
+            if ( numUnmappedReads == 1 && lastRead != null ) {
+                processContigChange();
+                numContigs--;
+            }
+        }
+        else if ( lastRead == null ) {
+            numContigs = 1;
+            currentContigNumStacks = 1;
+            currentStackNumReads = 1;
+        }
+        else if ( ! read.getReferenceIndex().equals(lastRead.getReferenceIndex()) ) {
+            processContigChange();
+        }
+        else if ( read.getAlignmentStart() != lastRead.getAlignmentStart() ) {
+            processStackChangeWithinContig(read);
+        }
+        else {
+            currentStackNumReads++;
+        }
+
+        updateReadLength(read.getReadLength());
+        allSamplesMatch = allSamplesMatch && readHasCorrectSample(read);
+        totalReads++;
+
+        lastRead = read;
+    }
+
+
+    private void processContigChange() {
+        numContigs++;
+
+        stacksPerContig.add(currentContigNumStacks);
+        currentContigNumStacks = 1;
+
+        updateReadsPerStack(currentStackNumReads);
+        currentStackNumReads = 1;
+    }
+
+    private void processStackChangeWithinContig( SAMRecord read ) {
+        currentContigNumStacks++;
+
+        updateReadsPerStack(currentStackNumReads);
+        currentStackNumReads = 1;
+
+        updateDistanceBetweenStacks(read.getAlignmentStart() - lastRead.getAlignmentStart());
+    }
+
+    private void updateReadsPerStack( int stackReadCount ) {
+        if ( minReadsPerStack == null || stackReadCount < minReadsPerStack ) {
+            minReadsPerStack = stackReadCount;
+        }
+        if ( maxReadsPerStack == null || stackReadCount > maxReadsPerStack ) {
+            maxReadsPerStack = stackReadCount;
+        }
+    }
+
+    private void updateDistanceBetweenStacks( int stackDistance ) {
+        if ( minDistanceBetweenStacks == null || stackDistance < minDistanceBetweenStacks ) {
+            minDistanceBetweenStacks = stackDistance;
+        }
+        if ( maxDistanceBetweenStacks == null || stackDistance > maxDistanceBetweenStacks ) {
+            maxDistanceBetweenStacks = stackDistance;
+        }
+    }
+
+    private void updateReadLength( int readLength ) {
+        if ( minReadLength == null || readLength < minReadLength ) {
+            minReadLength = readLength;
+        }
+        if ( maxReadLength == null || readLength > maxReadLength ) {
+            maxReadLength = readLength;
+        }
+    }
+
+    private boolean readHasCorrectSample( SAMRecord read ) {
+        return originalStream.getReadGroupID().equals(read.getAttribute("RG"));
+    }
+
+    public void finalizeStats() {
+        if ( lastRead != null && ! lastRead.getReadUnmappedFlag() ) {
+            stacksPerContig.add(currentContigNumStacks);
+            updateReadsPerStack(currentStackNumReads);
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java
+++ b/public/java/src/org/broadinstitute/sting/utils/threading/NamedThreadFactory.java
@ -0,0 +1,26 @@
+package org.broadinstitute.sting.utils.threading;
+
+import java.util.concurrent.ThreadFactory;
+
+/**
+ * Thread factor that produces threads with a given name pattern
+ *
+ * User: depristo
+ * Date: 9/5/12
+ * Time: 9:22 PM
+ *
+ */
+public class NamedThreadFactory implements ThreadFactory {
+    static int id = 0;
+    final String format;
+
+    public NamedThreadFactory(String format) {
+        this.format = format;
+        String.format(format, id); // test the name
+    }
+
+    @Override
+    public Thread newThread(Runnable r) {
+        return new Thread(r, String.format(format, id++));
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/WalkerTest.java
+++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java
@ -40,13 +40,13 @@ import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
-
-import java.io.*;
-
 import org.testng.Assert;
 import org.testng.annotations.AfterSuite;
 import org.testng.annotations.BeforeMethod;

+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
 import java.text.SimpleDateFormat;
 import java.util.*;

@ -251,20 +251,43 @@ public class WalkerTest extends BaseTest {
        return false;
    }

-    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
-        return executeTest(name, spec, Arrays.asList(1, 4));
+    public enum ParallelTestType {
+        TREE_REDUCIBLE,
+        NANO_SCHEDULED,
+        BOTH
    }

-    protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> parallelThreads) {
+    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec, ParallelTestType testType) {
+        final List<Integer> ntThreads  = testType == ParallelTestType.TREE_REDUCIBLE || testType == ParallelTestType.BOTH ? Arrays.asList(1, 4) : Collections.<Integer>emptyList();
+        final List<Integer> cntThreads = testType == ParallelTestType.NANO_SCHEDULED || testType == ParallelTestType.BOTH ? Arrays.asList(1, 4) : Collections.<Integer>emptyList();
+
+        return executeTest(name, spec, ntThreads, cntThreads);
+    }
+
+    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
+        return executeTestParallel(name, spec, ParallelTestType.TREE_REDUCIBLE);
+    }
+
+    protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> ntThreads, List<Integer> cpuThreads) {
        String originalArgs = spec.args;
        Pair<List<File>, List<String>> results = null;

-        for ( int nt : parallelThreads ) {
+        boolean ran1 = false;
+        for ( int nt : ntThreads ) {
            String extra = nt == 1 ? "" : (" -nt " + nt);
+            ran1 = ran1 || nt == 1;
            spec.args = originalArgs + extra;
            results = executeTest(name + "-nt-" + nt, spec);
        }

+        for ( int nct : cpuThreads ) {
+            if ( nct != 1 ) {
+                String extra = " -nct " + nct;
+                spec.args = originalArgs + extra;
+                results = executeTest(name + "-cnt-" + nct, spec);
+            }
+        }
+
        return results;
    }

--- a/public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/commandline/InvalidArgumentIntegrationTest.java
@ -0,0 +1,41 @@
+package org.broadinstitute.sting.commandline;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.WalkerTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.DataProvider;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: chartl
+ * Date: 8/31/12
+ * Time: 11:03 AM
+ * To change this template use File | Settings | File Templates.
+ */
+public class InvalidArgumentIntegrationTest extends WalkerTest {
+    private static final String callsB36  = BaseTest.validationDataLocation + "lowpass.N3.chr1.raw.vcf";
+
+    private WalkerTest.WalkerTestSpec baseTest(String flag, String arg, Class exeption) {
+        return new WalkerTest.WalkerTestSpec("-T VariantsToTable -M 10 --variant:vcf "
+                + callsB36 + " -F POS,CHROM -R "
+                + b36KGReference +  " -o %s " + flag + " " + arg,
+                1, exeption);
+
+    }
+
+    @Test
+    public void testUnknownReadFilter() {
+        executeTest("UnknownReadFilter",baseTest("-rf","TestUnknownReadFilter", UserException.MalformedReadFilterException.class));
+    }
+
+    @Test
+    public void testMalformedWalkerArgs() {
+        executeTest("MalformedWalkerArgs",
+                new WalkerTest.WalkerTestSpec("-T UnknownWalkerName -M 10 --variant:vcf "
+                + callsB36 + " -F POS,CHROM -R "
+                + b36KGReference +  " -o %s ",
+                1, UserException.MalformedWalkerArgumentsException.class));
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
@ -29,7 +29,7 @@ import net.sf.picard.filter.FilteringIterator;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Tags;
-import org.broadinstitute.sting.gatk.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
+import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
 import org.broadinstitute.sting.utils.GenomeLocParser;

 import java.util.Collections;
@ -97,7 +98,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
        },
        PER_SAMPLE {
            @Override
-            DownsamplingMethod create() { return GATKArgumentCollection.getDefaultDownsamplingMethod(); }
+            DownsamplingMethod create() { return DownsamplingMethod.getDefaultDownsamplingMethod(new CountLoci(), false); }
        };
        abstract DownsamplingMethod create();
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
@ -25,36 +25,40 @@
 package org.broadinstitute.sting.gatk.datasources.reads;

 import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMFileReader;
-import net.sf.samtools.SAMProgramRecord;
-import net.sf.samtools.SAMRecord;
+import net.sf.samtools.*;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
 import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
 import org.testng.annotations.AfterMethod;
 import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
+import org.testng.Assert;

 import java.io.File;
 import java.io.FileNotFoundException;
+import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;

 import static org.testng.Assert.*;

 /**
- * @author aaron
- * @version 1.0
- * @date Apr 8, 2009
 * <p/>
 * Class SAMDataSourceUnitTest
 * <p/>
@ -66,6 +70,161 @@ public class SAMDataSourceUnitTest extends BaseTest {
    private IndexedFastaSequenceFile seq;
    private GenomeLocParser genomeLocParser;

+
+    /***********************************
+     * Tests for the fillShard() method
+     ***********************************/
+
+    /**
+     * Tests to ensure that the fillShard() method does not place shard boundaries at inappropriate places,
+     * such as within an alignment start position
+     */
+    private static class SAMDataSourceFillShardBoundaryTest extends TestDataProvider {
+        private int numContigs;
+        private int numStacksPerContig;
+        private int stackSize;
+        private int numUnmappedReads;
+        private DownsamplingMethod downsamplingMethod;
+
+        private SAMFileHeader header;
+
+        public SAMDataSourceFillShardBoundaryTest( int numContigs,
+                                                   int numStacksPerContig,
+                                                   int stackSize,
+                                                   int numUnmappedReads,
+                                                   int downsamplingTargetCoverage ) {
+            super(SAMDataSourceFillShardBoundaryTest.class);
+
+            this.numContigs = numContigs;
+            this.numStacksPerContig = numStacksPerContig;
+            this.stackSize = stackSize;
+            this.numUnmappedReads = numUnmappedReads;
+
+            this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, true);
+
+            setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
+                                  getClass().getSimpleName(), numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage));
+        }
+
+        public void run() {
+            SAMDataSource dataSource = new SAMDataSource(Arrays.asList(createTestBAM()),
+                                                         new ThreadAllocation(),
+                                                         null,
+                                                         new GenomeLocParser(header.getSequenceDictionary()),
+                                                         false,
+                                                         SAMFileReader.ValidationStringency.SILENT,
+                                                         null,
+                                                         downsamplingMethod,
+                                                         new ValidationExclusion(),
+                                                         new ArrayList<ReadFilter>(),
+                                                         false);
+
+            Assert.assertTrue(dataSource.usingExpandedShards());
+
+            Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
+
+            SAMRecord readAtEndOfLastShard = null;
+
+            for ( Shard shard : shardIterator ) {
+                int numContigsThisShard = 0;
+                SAMRecord lastRead = null;
+
+                for ( SAMRecord read : shard.iterator() ) {
+                    if ( lastRead == null ) {
+                        numContigsThisShard = 1;
+                    }
+                    else if ( ! read.getReadUnmappedFlag() && ! lastRead.getReferenceIndex().equals(read.getReferenceIndex()) ) {
+                        numContigsThisShard++;
+                    }
+
+                    // If the last read from the previous shard is not unmapped, we have to make sure
+                    // that no reads in this shard start at the same position
+                    if ( readAtEndOfLastShard != null && ! readAtEndOfLastShard.getReadUnmappedFlag() ) {
+                        Assert.assertFalse(readAtEndOfLastShard.getReferenceIndex().equals(read.getReferenceIndex()) &&
+                                           readAtEndOfLastShard.getAlignmentStart() == read.getAlignmentStart(),
+                                           String.format("Reads from alignment start position %d:%d are split across multiple shards",
+                                                         read.getReferenceIndex(), read.getAlignmentStart()));
+                    }
+
+                    lastRead = read;
+                }
+
+                // There should never be reads from more than 1 contig in a shard (ignoring unmapped reads)
+                Assert.assertTrue(numContigsThisShard == 1, "found a shard with reads from multiple contigs");
+
+                readAtEndOfLastShard = lastRead;
+            }
+        }
+
+        private SAMReaderID createTestBAM() {
+            header = ArtificialSAMUtils.createArtificialSamHeader(numContigs, 1, 100000);
+            SAMReadGroupRecord readGroup = new SAMReadGroupRecord("foo");
+            readGroup.setSample("testSample");
+            header.addReadGroup(readGroup);
+            ArtificialSingleSampleReadStream artificialReads = new ArtificialSingleSampleReadStream(header,
+                                                                                                    "foo",
+                                                                                                    numContigs,
+                                                                                                    numStacksPerContig,
+                                                                                                    stackSize,
+                                                                                                    stackSize,
+                                                                                                    1,
+                                                                                                    100,
+                                                                                                    50,
+                                                                                                    150,
+                                                                                                    numUnmappedReads);
+
+            File testBAMFile;
+            try {
+                testBAMFile = File.createTempFile("SAMDataSourceFillShardBoundaryTest", ".bam");
+                testBAMFile.deleteOnExit();
+            }
+            catch ( IOException e ) {
+                throw new ReviewedStingException(String.format("Failed to create temp bam file for test %s. %s", this, e.getMessage()));
+            }
+
+            SAMFileWriter bamWriter = new SAMFileWriterFactory().setCreateIndex(true).makeBAMWriter(header, true, testBAMFile);
+            for ( SAMRecord read : artificialReads ) {
+                bamWriter.addAlignment(read);
+            }
+            bamWriter.close();
+
+            return new SAMReaderID(testBAMFile, new Tags());
+        }
+    }
+
+    @DataProvider(name = "SAMDataSourceFillShardTestDataProvider")
+    public Object[][] createSAMDataSourceFillShardBoundaryTests() {
+        // Take downsampling out of the equation for these tests -- we are only interested in whether the
+        // shard boundaries occur at the right places in the read stream, and removing downsampling as a
+        // factor simplifies that task (note that we still need to provide a specific downsampling method with
+        // experimental downsampling enabled to trigger the shard expansion behavior, for now)
+        int downsamplingTargetCoverage = ReadShard.MAX_READS * 10;
+
+        for ( int numContigs = 1; numContigs <= 3; numContigs++ ) {
+            for ( int numStacksPerContig : Arrays.asList(1, 2, 4) ) {
+                // Use crucial read shard boundary values as the stack sizes
+                for ( int stackSize : Arrays.asList(ReadShard.MAX_READS / 2, ReadShard.MAX_READS / 2 + 10, ReadShard.MAX_READS, ReadShard.MAX_READS - 1, ReadShard.MAX_READS + 1, ReadShard.MAX_READS * 2) ) {
+                    for ( int numUnmappedReads : Arrays.asList(0, ReadShard.MAX_READS / 2, ReadShard.MAX_READS * 2) ) {
+                        new SAMDataSourceFillShardBoundaryTest(numContigs, numStacksPerContig, stackSize, numUnmappedReads, downsamplingTargetCoverage);
+                    }
+                }
+            }
+        }
+
+        return SAMDataSourceFillShardBoundaryTest.getTests(SAMDataSourceFillShardBoundaryTest.class);
+    }
+
+    // TODO: re-enable these tests once the issues with filepointer ordering + the downsamplers are worked out
+    @Test(dataProvider = "SAMDataSourceFillShardTestDataProvider", enabled = false)
+    public void testSAMDataSourceFillShard( SAMDataSourceFillShardBoundaryTest test ) {
+        logger.warn("Running test: " + test);
+
+        test.run();
+    }
+
+
+    // TODO: the legacy tests below should really be replaced with a more comprehensive suite of tests for SAMDataSource
+
    /**
     * This function does the setup of our parser, before each method call.
     * <p/>
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingReadsIteratorUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/DownsamplingReadsIteratorUnitTest.java
@ -1,73 +1,138 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
 package org.broadinstitute.sting.gatk.downsampling;

 import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMReadGroupRecord;
 import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIteratorAdapter;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.testng.Assert;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

-import java.util.ArrayList;
-import java.util.Collection;
+import java.util.Arrays;

-public class DownsamplingReadsIteratorUnitTest {
+public class DownsamplingReadsIteratorUnitTest extends BaseTest {

-    @Test
-    public void testDownsamplingIteratorWithPositionalDownsampling() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+    private static class DownsamplingReadsIteratorTest extends TestDataProvider {
+        private DownsamplingReadsIterator downsamplingIter;
+        private int targetCoverage;
+        private ArtificialSingleSampleReadStream stream;
+        private ArtificialSingleSampleReadStreamAnalyzer streamAnalyzer;

-        Collection<SAMRecord> reads = new ArrayList<SAMRecord>();
+        public DownsamplingReadsIteratorTest( ArtificialSingleSampleReadStream stream, int targetCoverage ) {
+            super(DownsamplingReadsIteratorTest.class);

-        reads.addAll(createStackOfIdenticalReads(3000, header, "foo", 0, 1, 100));
-        reads.addAll(createStackOfIdenticalReads(3000, header, "foo", 0, 50, 100));
+            this.stream = stream;
+            this.targetCoverage = targetCoverage;

-        StingSAMIterator iter = new DownsamplingReadsIterator(StingSAMIteratorAdapter.adapt(reads.iterator()), new PositionalDownsampler<SAMRecord>(1000));
-
-        Assert.assertTrue(iter.hasNext());
-        SAMRecord previous = iter.next();
-        int count = 1;
-
-        while ( iter.hasNext() ) {
-            SAMRecord current = iter.next();
-            Assert.assertTrue(previous.getAlignmentStart() <= current.getAlignmentStart() || ! previous.getReferenceIndex().equals(current.getReferenceIndex()));
-            count++;
-            previous = current;
+            setName(String.format("%s: targetCoverage=%d numContigs=%d stacksPerContig=%d readsPerStack=%d-%d distanceBetweenStacks=%d-%d readLength=%d-%d unmappedReads=%d",
+                    getClass().getSimpleName(),
+                    targetCoverage,
+                    stream.getNumContigs(),
+                    stream.getNumStacksPerContig(),
+                    stream.getMinReadsPerStack(),
+                    stream.getMaxReadsPerStack(),
+                    stream.getMinDistanceBetweenStacks(),
+                    stream.getMaxDistanceBetweenStacks(),
+                    stream.getMinReadLength(),
+                    stream.getMaxReadLength(),
+                    stream.getNumUnmappedReads()));
        }

-        Assert.assertEquals(count, 1000);
+        public void run() {
+            streamAnalyzer = new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(stream, targetCoverage);
+            downsamplingIter = new DownsamplingReadsIterator(stream.getStingSAMIterator(), new SimplePositionalDownsampler<SAMRecord>(targetCoverage));
+
+            streamAnalyzer.analyze(downsamplingIter);
+
+            // Check whether the observed properties of the downsampled stream are what they should be
+            streamAnalyzer.validate();
+
+            // Allow memory used by this test to be reclaimed
+            stream = null;
+            streamAnalyzer = null;
+            downsamplingIter = null;
+        }
    }

-    @Test
-    public void testDownsamplingIteratorNoEffectiveDownsampling() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+    @DataProvider(name = "DownsamplingReadsIteratorTestDataProvider")
+    public Object[][] createDownsamplingReadsIteratorTests() {
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(5, 1, 10000);
+        String readGroupID = "testReadGroup";
+        SAMReadGroupRecord readGroup = new SAMReadGroupRecord(readGroupID);
+        readGroup.setSample("testSample");
+        header.addReadGroup(readGroup);

-        Collection<SAMRecord> reads = new ArrayList<SAMRecord>();
+        // Values that don't vary across tests
+        int targetCoverage = 10;
+        int minReadLength = 50;
+        int maxReadLength = 100;
+        int minDistanceBetweenStacks = 1;
+        int maxDistanceBetweenStacks = maxReadLength + 1;

-        reads.addAll(createStackOfIdenticalReads(300, header, "foo", 0, 1, 100));
-        reads.addAll(createStackOfIdenticalReads(300, header, "foo", 0, 50, 100));
+        GenomeAnalysisEngine.resetRandomGenerator();

-        StingSAMIterator iter = new DownsamplingReadsIterator(StingSAMIteratorAdapter.adapt(reads.iterator()), new PositionalDownsampler<SAMRecord>(1000));
-
-        Assert.assertTrue(iter.hasNext());
-        SAMRecord previous = iter.next();
-        int count = 1;
-
-        while ( iter.hasNext() ) {
-            SAMRecord current = iter.next();
-            Assert.assertTrue(previous.getAlignmentStart() <= current.getAlignmentStart() || ! previous.getReferenceIndex().equals(current.getReferenceIndex()));
-            count++;
-            previous = current;
+        // brute force testing!
+        for ( int numContigs : Arrays.asList(1, 2, 5) ) {
+            for ( int stacksPerContig : Arrays.asList(1, 2, 10) ) {
+                for ( int minReadsPerStack : Arrays.asList(1, targetCoverage / 2, targetCoverage, targetCoverage - 1, targetCoverage + 1, targetCoverage * 2) ) {
+                    for ( int maxReadsPerStack : Arrays.asList(1, targetCoverage / 2, targetCoverage, targetCoverage - 1, targetCoverage + 1, targetCoverage * 2) ) {
+                        for ( int numUnmappedReads : Arrays.asList(0, 1, targetCoverage, targetCoverage * 2) ) {
+                            // Only interested in sane read stream configurations here
+                            if ( minReadsPerStack <= maxReadsPerStack ) {
+                                new DownsamplingReadsIteratorTest(new ArtificialSingleSampleReadStream(header,
+                                                                                                       readGroupID,
+                                                                                                       numContigs,
+                                                                                                       stacksPerContig,
+                                                                                                       minReadsPerStack,
+                                                                                                       maxReadsPerStack,
+                                                                                                       minDistanceBetweenStacks,
+                                                                                                       maxDistanceBetweenStacks,
+                                                                                                       minReadLength,
+                                                                                                       maxReadLength,
+                                                                                                       numUnmappedReads),
+                                                                  targetCoverage);
+                            }
+                        }
+                    }
+                }
+            }
        }

-        Assert.assertEquals(count, 600);
+        return DownsamplingReadsIteratorTest.getTests(DownsamplingReadsIteratorTest.class);
    }

-    private ArrayList<SAMRecord> createStackOfIdenticalReads( int stackSize, SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
-        ArrayList<SAMRecord> stack = new ArrayList<SAMRecord>(stackSize);
-        for ( int i = 1; i <= stackSize; i++ ) {
-            stack.add(ArtificialSAMUtils.createArtificialRead(header, name, refIndex, alignmentStart, length));
-        }
-        return stack;
+    @Test(dataProvider = "DownsamplingReadsIteratorTestDataProvider")
+    public void runDownsamplingReadsIteratorTest( DownsamplingReadsIteratorTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+        test.run();
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/FractionalDownsamplerUnitTest.java
@ -1,65 +1,157 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
 package org.broadinstitute.sting.gatk.downsampling;

 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 import org.testng.Assert;

 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.List;

-public class FractionalDownsamplerUnitTest {
+public class FractionalDownsamplerUnitTest extends BaseTest {

-    @Test
-    public void test100PercentInclusion() {
-        FractionalDownsampler<SAMRecord> downsampler = new FractionalDownsampler<SAMRecord>(1.0);
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+    private static class FractionalDownsamplerTest extends TestDataProvider {
+        double fraction;
+        int totalReads;
+        int expectedMinNumReadsAfterDownsampling;
+        int expectedMaxNumReadsAfterDownsampling;
+        int expectedMinDiscardedItems;
+        int expectedMaxDiscardedItems;

-        downsampler.submit(createRandomReads(1000, header, "foo", 0, 100000, 500));
-        downsampler.signalEndOfInput();
+        private static final double EXPECTED_ACCURACY = 0.05; // should be accurate to within +/- this percent

-        List<SAMRecord> downsampledReads = downsampler.consumeDownsampledItems();
+        public FractionalDownsamplerTest( double fraction, int totalReads ) {
+            super(FractionalDownsamplerTest.class);

-        Assert.assertTrue(downsampledReads.size() == 1000);
-    }
+            this.fraction = fraction;
+            this.totalReads = totalReads;

-    @Test
-    public void test0PercentInclusion() {
-        FractionalDownsampler<SAMRecord> downsampler = new FractionalDownsampler<SAMRecord>(0.0);
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+            calculateExpectations();

-        downsampler.submit(createRandomReads(1000, header, "foo", 0, 100000, 500));
-        downsampler.signalEndOfInput();
-
-        List<SAMRecord> downsampledReads = downsampler.consumeDownsampledItems();
-
-        Assert.assertTrue(downsampledReads.isEmpty());
-    }
-
-    @Test
-    public void test50PercentInclusion() {
-        FractionalDownsampler<SAMRecord> downsampler = new FractionalDownsampler<SAMRecord>(0.5);
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        downsampler.submit(createRandomReads(5000, header, "foo", 0, 100000, 500));
-        downsampler.signalEndOfInput();
-
-        List<SAMRecord> downsampledReads = downsampler.consumeDownsampledItems();
-
-        Assert.assertTrue(downsampledReads.size() >= 2000 && downsampledReads.size() <= 3000);
-    }
-
-    private List<SAMRecord> createRandomReads( int numReads, SAMFileHeader header, String name, int contigIndex, int maxAlignmentStart, int maxLength ) {
-        List<SAMRecord> reads = new ArrayList<SAMRecord>(numReads);
-
-        for ( int i = 1; i <= numReads; i++ ) {
-            reads.add(ArtificialSAMUtils.createArtificialRead(header, name, contigIndex,
-                                                              GenomeAnalysisEngine.getRandomGenerator().nextInt(maxAlignmentStart) + 1,
-                                                              GenomeAnalysisEngine.getRandomGenerator().nextInt(maxLength) + 1));
+            setName(String.format("%s: fraction=%.2f totalReads=%d expectedMinNumReadsAfterDownsampling=%d expectedMaxNumReadsAfterDownsampling=%d",
+                    getClass().getSimpleName(), fraction, totalReads, expectedMinNumReadsAfterDownsampling, expectedMaxNumReadsAfterDownsampling));
        }

-        return reads;
+        private void calculateExpectations() {
+            // Require an exact match in the 0% and 100% cases
+            if ( fraction == 0.0 ) {
+                expectedMinNumReadsAfterDownsampling = expectedMaxNumReadsAfterDownsampling = 0;
+                expectedMinDiscardedItems = expectedMaxDiscardedItems = totalReads;
+            }
+            else if ( fraction == 1.0 ) {
+                expectedMinNumReadsAfterDownsampling = expectedMaxNumReadsAfterDownsampling = totalReads;
+                expectedMinDiscardedItems = expectedMaxDiscardedItems = 0;
+            }
+            else {
+                expectedMinNumReadsAfterDownsampling = Math.max((int)((fraction - EXPECTED_ACCURACY) * totalReads), 0);
+                expectedMaxNumReadsAfterDownsampling = Math.min((int) ((fraction + EXPECTED_ACCURACY) * totalReads), totalReads);
+                expectedMinDiscardedItems = totalReads - expectedMaxNumReadsAfterDownsampling;
+                expectedMaxDiscardedItems = totalReads - expectedMinNumReadsAfterDownsampling;
+            }
+        }
+
+        public Collection<SAMRecord> createReads() {
+            Collection<SAMRecord> reads = new ArrayList<SAMRecord>(totalReads);
+
+            SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+            reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(totalReads, header, "foo", 0, 1, 100));
+
+            return reads;
+        }
+    }
+
+    @DataProvider(name = "FractionalDownsamplerTestDataProvider")
+    public Object[][] createFractionalDownsamplerTestData() {
+        for ( double fraction : Arrays.asList(0.0, 0.25, 0.5, 0.75, 1.0) ) {
+            for ( int totalReads : Arrays.asList(0, 1000, 10000) ) {
+                new FractionalDownsamplerTest(fraction, totalReads);
+            }
+        }
+
+        return FractionalDownsamplerTest.getTests(FractionalDownsamplerTest.class);
+    }
+
+    @Test(dataProvider = "FractionalDownsamplerTestDataProvider")
+    public void runFractionalDownsamplerTest( FractionalDownsamplerTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        ReadsDownsampler<SAMRecord> downsampler = new FractionalDownsampler<SAMRecord>(test.fraction);
+
+        downsampler.submit(test.createReads());
+
+        if ( test.totalReads > 0 ) {
+            if ( test.fraction > FractionalDownsamplerTest.EXPECTED_ACCURACY ) {
+                Assert.assertTrue(downsampler.hasFinalizedItems());
+                Assert.assertTrue(downsampler.peekFinalized() != null);
+            }
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        downsampler.signalEndOfInput();
+
+        if ( test.totalReads > 0 ) {
+            if ( test.fraction > FractionalDownsamplerTest.EXPECTED_ACCURACY ) {
+                Assert.assertTrue(downsampler.hasFinalizedItems());
+                Assert.assertTrue(downsampler.peekFinalized() != null);
+            }
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+        Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+
+        Assert.assertTrue(downsampledReads.size() >= test.expectedMinNumReadsAfterDownsampling &&
+                          downsampledReads.size() <= test.expectedMaxNumReadsAfterDownsampling);
+
+        Assert.assertTrue(downsampler.getNumberOfDiscardedItems() >= test.expectedMinDiscardedItems &&
+                          downsampler.getNumberOfDiscardedItems() <= test.expectedMaxDiscardedItems);
+
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.totalReads - downsampledReads.size());
+
+        downsampler.reset();
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/LevelingDownsamplerUnitTest.java
@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.testng.annotations.Test;
+import org.testng.annotations.DataProvider;
+import org.testng.Assert;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.List;
+
+public class LevelingDownsamplerUnitTest extends BaseTest {
+
+    private static class LevelingDownsamplerUniformStacksTest extends TestDataProvider {
+        public enum DataStructure { LINKED_LIST, ARRAY_LIST }
+
+        int targetSize;
+        int numStacks;
+        int stackSize;
+        DataStructure dataStructure;
+        int expectedSize;
+
+        public LevelingDownsamplerUniformStacksTest( int targetSize, int numStacks, int stackSize, DataStructure dataStructure ) {
+            super(LevelingDownsamplerUniformStacksTest.class);
+
+            this.targetSize = targetSize;
+            this.numStacks = numStacks;
+            this.stackSize = stackSize;
+            this.dataStructure = dataStructure;
+            expectedSize = calculateExpectedDownsampledStackSize();
+
+            setName(String.format("%s: targetSize=%d numStacks=%d stackSize=%d dataStructure=%s expectedSize=%d",
+                    getClass().getSimpleName(), targetSize, numStacks, stackSize, dataStructure, expectedSize));
+        }
+
+        public Collection<List<Object>> createStacks() {
+            Collection<List<Object>> stacks = new ArrayList<List<Object>>();
+
+            for ( int i = 1; i <= numStacks; i++ ) {
+                List<Object> stack = dataStructure == DataStructure.LINKED_LIST ? new LinkedList<Object>() : new ArrayList<Object>();
+
+                for ( int j = 1; j <= stackSize; j++ ) {
+                    stack.add(new Object());
+                }
+
+                stacks.add(stack);
+            }
+
+            return stacks;
+        }
+
+        private int calculateExpectedDownsampledStackSize() {
+            int numItemsToRemove = numStacks * stackSize - targetSize;
+
+            if ( numStacks == 0 ) {
+                return 0;
+            }
+            else if ( numItemsToRemove <= 0 ) {
+                return stackSize;
+            }
+
+            return Math.max(1, stackSize - (numItemsToRemove / numStacks));
+        }
+    }
+
+    @DataProvider(name = "UniformStacksDataProvider")
+    public Object[][] createUniformStacksTestData() {
+        for ( int targetSize = 1; targetSize <= 10000; targetSize *= 10 ) {
+            for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
+                for ( int stackSize = 1; stackSize <= 1000; stackSize *= 10 ) {
+                    for ( LevelingDownsamplerUniformStacksTest.DataStructure dataStructure : LevelingDownsamplerUniformStacksTest.DataStructure.values() ) {
+                        new LevelingDownsamplerUniformStacksTest(targetSize, numStacks, stackSize, dataStructure);
+                    }
+                }
+            }
+        }
+
+        return LevelingDownsamplerUniformStacksTest.getTests(LevelingDownsamplerUniformStacksTest.class);
+    }
+
+    @Test( dataProvider = "UniformStacksDataProvider" )
+    public void testLevelingDownsamplerWithUniformStacks( LevelingDownsamplerUniformStacksTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        Downsampler<List<Object>> downsampler = new LevelingDownsampler<List<Object>, Object>(test.targetSize);
+
+        downsampler.submit(test.createStacks());
+
+        if ( test.numStacks > 0 ) {
+            Assert.assertFalse(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null);
+            Assert.assertTrue(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() != null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        downsampler.signalEndOfInput();
+
+        if ( test.numStacks > 0 ) {
+            Assert.assertTrue(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() != null);
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        List<List<Object>> downsampledStacks = downsampler.consumeFinalizedItems();
+        Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+
+        Assert.assertEquals(downsampledStacks.size(), test.numStacks);
+
+        int totalRemainingItems = 0;
+        for ( List<Object> stack : downsampledStacks ) {
+            Assert.assertTrue(Math.abs(stack.size() - test.expectedSize) <= 1);
+            totalRemainingItems += stack.size();
+        }
+
+        int numItemsReportedDiscarded = downsampler.getNumberOfDiscardedItems();
+        int numItemsActuallyDiscarded = test.numStacks * test.stackSize - totalRemainingItems;
+
+        Assert.assertEquals(numItemsReportedDiscarded, numItemsActuallyDiscarded);
+
+        downsampler.reset();
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
+
+        Assert.assertTrue(totalRemainingItems <= Math.max(test.targetSize, test.numStacks));
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/PerSampleDownsamplingReadsIteratorUnitTest.java
@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.iterators.VerifyingSamIterator;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.ArtificialMultiSampleReadStream;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+public class PerSampleDownsamplingReadsIteratorUnitTest extends BaseTest {
+
+    private static class PerSampleDownsamplingReadsIteratorTest extends TestDataProvider {
+
+        // TODO: tests should distinguish between variance across samples and variance within a sample
+
+        private enum StreamDensity {
+            SPARSE         (MAX_READ_LENGTH,     MAX_READ_LENGTH * 2),
+            DENSE          (1,                   MIN_READ_LENGTH),
+            MIXED          (1,                   MAX_READ_LENGTH * 2),
+            UNIFORM_DENSE  (1,                   1),
+            UNIFORM_SPARSE (MAX_READ_LENGTH * 2, MAX_READ_LENGTH * 2);
+
+            int minDistanceBetweenStacks;
+            int maxDistanceBetweenStacks;
+
+            StreamDensity( int minDistanceBetweenStacks, int maxDistanceBetweenStacks ) {
+                this.minDistanceBetweenStacks = minDistanceBetweenStacks;
+                this.maxDistanceBetweenStacks = maxDistanceBetweenStacks;
+            }
+
+            public String toString() {
+                return String.format("StreamDensity:%d-%d", minDistanceBetweenStacks, maxDistanceBetweenStacks);
+            }
+        }
+
+        private enum StreamStackDepth {
+            NON_UNIFORM_LOW   (1,  5),
+            NON_UNIFORM_HIGH  (15, 20),
+            NON_UNIFORM_MIXED (1,  20),
+            UNIFORM_SINGLE    (1,  1),
+            UNIFORM_LOW       (2,  2),
+            UNIFORM_HIGH      (20, 20),
+            UNIFORM_MEDIUM    (10, 10);   // should set target coverage to this value for testing
+
+            int minReadsPerStack;
+            int maxReadsPerStack;
+
+            StreamStackDepth( int minReadsPerStack, int maxReadsPerStack ) {
+                this.minReadsPerStack = minReadsPerStack;
+                this.maxReadsPerStack = maxReadsPerStack;
+            }
+
+            public boolean isUniform() {
+                return minReadsPerStack == maxReadsPerStack;
+            }
+
+            public String toString() {
+                return String.format("StreamStackDepth:%d-%d", minReadsPerStack, maxReadsPerStack);
+            }
+        }
+
+        private enum StreamStacksPerContig {
+            UNIFORM(20, 20),
+            NON_UNIFORM(1, 30);
+
+            int minStacksPerContig;
+            int maxStacksPerContig;
+
+            StreamStacksPerContig( int minStacksPerContig, int maxStacksPerContig ) {
+                this.minStacksPerContig = minStacksPerContig;
+                this.maxStacksPerContig = maxStacksPerContig;
+            }
+
+            public boolean isUniform() {
+                return minStacksPerContig == maxStacksPerContig;
+            }
+
+            public String toString() {
+                return String.format("StreamStacksPerContig:%d-%d", minStacksPerContig, maxStacksPerContig);
+            }
+        }
+
+        // Not interested in testing multiple ranges for the read lengths, as none of our current
+        // downsamplers are affected by read length
+        private static final int MIN_READ_LENGTH = 50;
+        private static final int MAX_READ_LENGTH = 150;
+
+        private ReadsDownsamplerFactory<SAMRecord> downsamplerFactory;
+        private int targetCoverage;
+        private int numSamples;
+        private int minContigs;
+        private int maxContigs;
+        private StreamDensity streamDensity;
+        private StreamStackDepth streamStackDepth;
+        private StreamStacksPerContig streamStacksPerContig;
+        private double unmappedReadsFraction;
+        private int unmappedReadsCount;
+        private boolean verifySortedness;
+
+        private ArtificialMultiSampleReadStream mergedReadStream;
+        private Map<String, ArtificialSingleSampleReadStream> perSampleArtificialReadStreams;
+        private Map<String, ArtificialSingleSampleReadStreamAnalyzer> perSampleStreamAnalyzers;
+        private SAMFileHeader header;
+
+        public PerSampleDownsamplingReadsIteratorTest( ReadsDownsamplerFactory<SAMRecord> downsamplerFactory,
+                                                       int targetCoverage,
+                                                       int numSamples,
+                                                       int minContigs,
+                                                       int maxContigs,
+                                                       StreamDensity streamDensity,
+                                                       StreamStackDepth streamStackDepth,
+                                                       StreamStacksPerContig streamStacksPerContig,
+                                                       double unmappedReadsFraction,
+                                                       int unmappedReadsCount,
+                                                       boolean verifySortedness ) {
+            super(PerSampleDownsamplingReadsIteratorTest.class);
+
+            this.downsamplerFactory = downsamplerFactory;
+            this.targetCoverage = targetCoverage;
+            this.numSamples = numSamples;
+            this.minContigs = minContigs;
+            this.maxContigs = maxContigs;
+            this.streamDensity = streamDensity;
+            this.streamStackDepth = streamStackDepth;
+            this.streamStacksPerContig = streamStacksPerContig;
+            this.unmappedReadsFraction = unmappedReadsFraction;
+            this.unmappedReadsCount = unmappedReadsCount;
+            this.verifySortedness = verifySortedness;
+
+            header = createHeader();
+            createReadStreams();
+
+            setName(String.format("%s: targetCoverage=%d numSamples=%d minContigs=%d maxContigs=%d %s %s %s unmappedReadsFraction=%.2f unmappedReadsCount=%d verifySortedness=%b",
+                    getClass().getSimpleName(), targetCoverage, numSamples, minContigs, maxContigs, streamDensity, streamStackDepth, streamStacksPerContig, unmappedReadsFraction, unmappedReadsCount, verifySortedness));
+        }
+
+        private SAMFileHeader createHeader() {
+            SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(maxContigs, 1, (streamDensity.maxDistanceBetweenStacks + MAX_READ_LENGTH) * streamStacksPerContig.maxStacksPerContig + 100000);
+            List<String> readGroups = new ArrayList<String>(numSamples);
+            List<String> sampleNames = new ArrayList<String>(numSamples);
+
+            for ( int i = 0; i < numSamples; i++ ) {
+                readGroups.add("ReadGroup" + i);
+                sampleNames.add("Sample" + i);
+            }
+
+            return ArtificialSAMUtils.createEnumeratedReadGroups(header, readGroups, sampleNames);
+        }
+
+        private void createReadStreams() {
+            perSampleArtificialReadStreams = new HashMap<String, ArtificialSingleSampleReadStream>(numSamples);
+            perSampleStreamAnalyzers = new HashMap<String, ArtificialSingleSampleReadStreamAnalyzer>(numSamples);
+
+            for (SAMReadGroupRecord readGroup : header.getReadGroups() ) {
+                String readGroupID = readGroup.getReadGroupId();
+                String sampleName = readGroup.getSample();
+
+                int thisSampleNumContigs = MathUtils.randomIntegerInRange(minContigs, maxContigs);
+                int thisSampleStacksPerContig = MathUtils.randomIntegerInRange(streamStacksPerContig.minStacksPerContig, streamStacksPerContig.maxStacksPerContig);
+
+                int thisSampleNumUnmappedReads = GenomeAnalysisEngine.getRandomGenerator().nextDouble() < unmappedReadsFraction ? unmappedReadsCount : 0;
+
+                ArtificialSingleSampleReadStream thisSampleStream = new ArtificialSingleSampleReadStream(header,
+                                                                                                         readGroupID,
+                                                                                                         thisSampleNumContigs,
+                                                                                                         thisSampleStacksPerContig,
+                                                                                                         streamStackDepth.minReadsPerStack,
+                                                                                                         streamStackDepth.maxReadsPerStack,
+                                                                                                         streamDensity.minDistanceBetweenStacks,
+                                                                                                         streamDensity.maxDistanceBetweenStacks,
+                                                                                                         MIN_READ_LENGTH,
+                                                                                                         MAX_READ_LENGTH,
+                                                                                                         thisSampleNumUnmappedReads);
+                perSampleArtificialReadStreams.put(sampleName, thisSampleStream);
+                perSampleStreamAnalyzers.put(sampleName, new PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer(thisSampleStream, targetCoverage));
+            }
+
+            mergedReadStream = new ArtificialMultiSampleReadStream(perSampleArtificialReadStreams.values());
+        }
+
+        public void run() {
+            StingSAMIterator downsamplingIter = new PerSampleDownsamplingReadsIterator(mergedReadStream.getStingSAMIterator(), downsamplerFactory);
+
+            if ( verifySortedness ) {
+                downsamplingIter = new VerifyingSamIterator(downsamplingIter);
+            }
+
+            while ( downsamplingIter.hasNext() ) {
+                SAMRecord read = downsamplingIter.next();
+                String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+
+                ArtificialSingleSampleReadStreamAnalyzer analyzer = perSampleStreamAnalyzers.get(sampleName);
+                if ( analyzer != null ) {
+                    analyzer.update(read);
+                }
+                else {
+                    throw new ReviewedStingException("bug: stream analyzer for sample " + sampleName + " not found");
+                }
+            }
+
+            for ( Map.Entry<String, ArtificialSingleSampleReadStreamAnalyzer> analyzerEntry : perSampleStreamAnalyzers.entrySet() ) {
+                ArtificialSingleSampleReadStreamAnalyzer analyzer = analyzerEntry.getValue();
+                analyzer.finalizeStats();
+
+                // Validate the downsampled read stream for each sample individually
+                analyzer.validate();
+            }
+
+            // Allow memory used by this test to be reclaimed:
+            mergedReadStream = null;
+            perSampleArtificialReadStreams = null;
+            perSampleStreamAnalyzers = null;
+        }
+    }
+
+    @DataProvider(name = "PerSampleDownsamplingReadsIteratorTestDataProvider")
+    public Object[][] createPerSampleDownsamplingReadsIteratorTests() {
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        // Some values don't vary across tests
+        int targetCoverage = PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.UNIFORM_MEDIUM.minReadsPerStack;
+        ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = new SimplePositionalDownsamplerFactory<SAMRecord>(targetCoverage);
+        int maxContigs = 3;
+        boolean verifySortedness = true;
+
+        for ( int numSamples : Arrays.asList(1, 2, 10) ) {
+            for ( int minContigs = 1; minContigs <= maxContigs; minContigs++ ) {
+                for ( PerSampleDownsamplingReadsIteratorTest.StreamDensity streamDensity : PerSampleDownsamplingReadsIteratorTest.StreamDensity.values() ) {
+                    for ( PerSampleDownsamplingReadsIteratorTest.StreamStackDepth streamStackDepth : PerSampleDownsamplingReadsIteratorTest.StreamStackDepth.values() ) {
+                        for (PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig streamStacksPerContig : PerSampleDownsamplingReadsIteratorTest.StreamStacksPerContig.values() ) {
+                            for ( double unmappedReadsFraction : Arrays.asList(0.0, 1.0, 0.5) ) {
+                                for ( int unmappedReadsCount : Arrays.asList(1, 50) ) {
+                                    new PerSampleDownsamplingReadsIteratorTest(downsamplerFactory,
+                                                                               targetCoverage,
+                                                                               numSamples,
+                                                                               minContigs,
+                                                                               maxContigs,
+                                                                               streamDensity,
+                                                                               streamStackDepth,
+                                                                               streamStacksPerContig,
+                                                                               unmappedReadsFraction,
+                                                                               unmappedReadsCount,
+                                                                               verifySortedness);
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return PerSampleDownsamplingReadsIteratorTest.getTests(PerSampleDownsamplingReadsIteratorTest.class);
+    }
+
+    @Test(dataProvider = "PerSampleDownsamplingReadsIteratorTestDataProvider")
+    public void runPerSampleDownsamplingReadsIteratorTest( PerSampleDownsamplingReadsIteratorTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+        test.run();
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionalDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionalDownsamplerUnitTest.java
@ -1,357 +0,0 @@
-package org.broadinstitute.sting.gatk.downsampling;
-
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
-import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
-import org.testng.annotations.Test;
-import org.testng.Assert;
-
-import java.util.*;
-
-// TODO: generalize these tests so that all possible arrangements of 1-4 stacks can be tested
-public class PositionalDownsamplerUnitTest extends BaseTest {
-
-    /**
-     * -------
-     * -------
-     *   -------
-     *   -------
-     *     -------
-     *     -------
-     */
-    @Test
-    public void testThreeOverlappingIdenticalStacks() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 1, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 25, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 50, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreeOverlappingIdenticalStacks: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(0) + downsampledStackSizes.get(1) + downsampledStackSizes.get(2) <= 1000);
-    }
-
-    /**
-     * -------
-     * -------
-     *         -------
-     *         -------
-     *                 -------
-     *                 -------
-     */
-    @Test
-    public void testThreeNonOverlappingIdenticalStacks() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 1, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 201, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 301, 100));
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreeNonOverlappingIdenticalStacks: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) == 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) == 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) == 1000);
-    }
-
-    /**
-     * ---
-     * ---
-     *   -------
-     *   -------
-     *     -------
-     *     -------
-     */
-    @Test
-    public void testThreeStacksWithShortStackAtBeginning() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 1, 25));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 20, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 50, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreeStacksWithShortStackAtBeginning: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(0) + downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) + downsampledStackSizes.get(2) <= 1000);
-    }
-
-    /**
-     * -------
-     * -------
-     *   ---
-     *   ---
-     *      -------
-     *      -------
-     */
-    @Test
-    public void testThreeStacksWithShortStackInMiddle() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 1, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 25, 25));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 75, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreeStacksWithShortStackInMiddle: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(0) + downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(0) + downsampledStackSizes.get(2) <= 1000);
-    }
-
-    /**
-     * ------
-     * ------
-     *   -------
-     *   -------
-     *        ---
-     *        ---
-     */
-    @Test
-    public void testThreeStacksWithShortStackAtEnd() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 1, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 50, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(1500, header, "foo", 0, 135, 25));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreeStacksWithShortStackAtEnd: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(0) + downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) + downsampledStackSizes.get(2) <= 1000);
-    }
-
-    /**
-     * -------
-     * ----
-     *      -------
-     *      ----
-     *           -------
-     *           -------
-     */
-    @Test
-    public void testThreePartiallyOverlappingStacks() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfVaryingReads(2000, header, "foo", 0, 1, 100, 50));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfVaryingReads(2000, header, "foo", 0, 75, 100, 50));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(2000, header, "foo", 0, 150, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testThreePartiallyOverlappingStacks: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(1) <= 1000);
-        Assert.assertTrue(downsampledStackSizes.get(2) <= 1000);
-
-        // TODO: need to examine per-base coverage here
-    }
-
-    @Test
-    public void testNoDownsamplingRequired() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-
-        PositionalDownsampler<SAMRecord> downsampler = new PositionalDownsampler<SAMRecord>(1000);
-
-        downsampler.submit(createStackOfIdenticalReads(300, header, "foo", 0, 1, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(300, header, "foo", 0, 25, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.submit(createStackOfIdenticalReads(300, header, "foo", 0, 50, 100));
-        Assert.assertFalse(downsampler.hasDownsampledItems());
-        Assert.assertTrue(downsampler.hasPendingItems());
-
-        downsampler.signalEndOfInput();
-        Assert.assertTrue(downsampler.hasDownsampledItems());
-        Assert.assertFalse(downsampler.hasPendingItems());
-
-        List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampler.consumeDownsampledItems());
-
-        System.out.println("testNoDownsamplingRequired: Downsampled Stack sizes: " + downsampledStackSizes);
-
-        Assert.assertEquals(downsampledStackSizes.size(), 3);
-        Assert.assertTrue(downsampledStackSizes.get(0) == 300);
-        Assert.assertTrue(downsampledStackSizes.get(1) == 300);
-        Assert.assertTrue(downsampledStackSizes.get(2) == 300);
-    }
-
-    @Test
-    public void testGATKSAMRecordSupport() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
-        PositionalDownsampler<GATKSAMRecord> downsampler = new PositionalDownsampler<GATKSAMRecord>(1000);
-
-        List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
-        for ( int i = 0; i < 10; i++ ) {
-            reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, 20 * i + 10));
-        }
-
-        downsampler.submit(reads);
-        downsampler.signalEndOfInput();
-        List<GATKSAMRecord> downsampledReads = downsampler.consumeDownsampledItems();
-
-        Assert.assertTrue(downsampledReads.size() == 10);
-    }
-
-    private ArrayList<SAMRecord> createStackOfIdenticalReads( int stackSize, SAMFileHeader header, String name, int refIndex, int alignmentStart, int length ) {
-        ArrayList<SAMRecord> stack = new ArrayList<SAMRecord>(stackSize);
-        for ( int i = 1; i <= stackSize; i++ ) {
-            stack.add(ArtificialSAMUtils.createArtificialRead(header, name, refIndex, alignmentStart, length));
-        }
-        return stack;
-    }
-
-    private ArrayList<SAMRecord> createStackOfVaryingReads( int stackSize, SAMFileHeader header, String name, int refIndex, int alignmentStart, int firstLength, int secondLength ) {
-        ArrayList<SAMRecord> stack = createStackOfIdenticalReads(stackSize / 2, header, name, refIndex, alignmentStart, firstLength);
-        stack.addAll(createStackOfIdenticalReads(stackSize / 2, header, name, refIndex, alignmentStart, secondLength));
-        return stack;
-    }
-
-    private List<Integer> getDownsampledStackSizesAndVerifySortedness( List<SAMRecord> downsampledReads ) {
-        List<Integer> stackSizes = new ArrayList<Integer>();
-        Iterator<SAMRecord> iter = downsampledReads.iterator();
-        Assert.assertTrue(iter.hasNext());
-
-        SAMRecord previousRead = iter.next();
-        int currentStackSize = 1;
-
-        while ( iter.hasNext() ) {
-            SAMRecord currentRead = iter.next();
-
-            if ( ! currentRead.getReferenceIndex().equals(previousRead.getReferenceIndex()) || currentRead.getAlignmentStart() > previousRead.getAlignmentStart() ) {
-                stackSizes.add(currentStackSize);
-                currentStackSize = 1;
-            }
-            else if ( currentRead.getAlignmentStart() < previousRead.getAlignmentStart() ) {
-                Assert.fail(String.format("Reads are out of order: %s %s", previousRead, currentRead));
-            }
-            else {
-                currentStackSize++;
-            }
-
-            previousRead = currentRead;
-        }
-
-        stackSizes.add(currentStackSize);
-        return stackSizes;
-    }
-}
-
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer.java
@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStream;
+import org.broadinstitute.sting.utils.sam.ArtificialSingleSampleReadStreamAnalyzer;
+
+/**
+ * Class for analyzing an artificial read stream that has been positionally downsampled, and verifying
+ * that the downsampling was done correctly without changing the stream in unexpected ways.
+ *
+ * @author David Roazen
+ */
+public class PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer extends ArtificialSingleSampleReadStreamAnalyzer {
+    private int targetCoverage;
+
+    public PositionallyDownsampledArtificialSingleSampleReadStreamAnalyzer( ArtificialSingleSampleReadStream originalStream, int targetCoverage ) {
+        super(originalStream);
+        this.targetCoverage = targetCoverage;
+    }
+
+    /**
+     * Overridden validate() method that checks for the effects of positional downsampling in addition to checking
+     * for whether the original properties of the stream not affected by downsampling have been preserved
+     */
+    @Override
+    public void validate() {
+        if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) && originalStream.getNumUnmappedReads() == 0 ) {
+            if ( totalReads != 0 ) {
+                throw new ReviewedStingException("got reads from the stream, but the stream was configured to have 0 reads");
+            }
+            return;  // no further validation needed for the 0-reads case
+        }
+        else if ( totalReads == 0 ) {
+            throw new ReviewedStingException("got no reads from the stream, but the stream was configured to have > 0 reads");
+        }
+
+        if ( ! allSamplesMatch ) {
+            throw new ReviewedStingException("some reads had the wrong sample");
+        }
+
+        if ( numContigs != originalStream.getNumContigs() ) {
+            throw new ReviewedStingException("number of contigs not correct");
+        }
+
+        if ( stacksPerContig.size() != originalStream.getNumContigs() ) {
+            throw new ReviewedStingException(String.format("bug in analyzer code: calculated sizes for %d contigs even though there were only %d contigs",
+                                                           stacksPerContig.size(), originalStream.getNumContigs()));
+        }
+
+        for ( int contigStackCount : stacksPerContig ) {
+            if ( contigStackCount != originalStream.getNumStacksPerContig() ) {
+                throw new ReviewedStingException("contig had incorrect number of stacks");
+            }
+        }
+
+        if ( originalStream.getNumStacksPerContig() > 0 ) {
+
+            // Check for the effects of positional downsampling:
+            int stackMinimumAfterDownsampling = Math.min(targetCoverage, originalStream.getMinReadsPerStack());
+            int stackMaximumAfterDownsampling = targetCoverage;
+
+            if ( minReadsPerStack < stackMinimumAfterDownsampling ) {
+                throw new ReviewedStingException("stack had fewer than the minimum number of reads after downsampling");
+            }
+            if ( maxReadsPerStack > stackMaximumAfterDownsampling ) {
+                throw new ReviewedStingException("stack had more than the maximum number of reads after downsampling");
+            }
+        }
+        else if ( minReadsPerStack != null || maxReadsPerStack != null ) {
+            throw new ReviewedStingException("bug in analyzer code: reads per stack was calculated even though 0 stacks per contig was specified");
+        }
+
+        if ( originalStream.getNumStacksPerContig() > 1 ) {
+            if ( minDistanceBetweenStacks < originalStream.getMinDistanceBetweenStacks() ) {
+                throw new ReviewedStingException("stacks were separated by less than the minimum distance");
+            }
+            if ( maxDistanceBetweenStacks > originalStream.getMaxDistanceBetweenStacks() ) {
+                throw new ReviewedStingException("stacks were separated by more than the maximum distance");
+            }
+        }
+        else if ( minDistanceBetweenStacks != null || maxDistanceBetweenStacks != null ) {
+            throw new ReviewedStingException("bug in analyzer code: distance between stacks was calculated even though numStacksPerContig was <= 1");
+        }
+
+        if ( minReadLength < originalStream.getMinReadLength() ) {
+            throw new ReviewedStingException("read was shorter than the minimum allowed length");
+        }
+        if ( maxReadLength > originalStream.getMaxReadLength() ) {
+            throw new ReviewedStingException("read was longer than the maximum allowed length");
+        }
+
+        if ( numUnmappedReads != originalStream.getNumUnmappedReads() ) {
+            throw new ReviewedStingException(String.format("wrong number of unmapped reads: requested %d but saw %d",
+                                                           originalStream.getNumUnmappedReads(), numUnmappedReads));
+        }
+
+        if ( (originalStream.getNumContigs() == 0 || originalStream.getNumStacksPerContig() == 0) &&
+             numUnmappedReads != totalReads ) {
+            throw new ReviewedStingException("stream should have consisted only of unmapped reads, but saw some mapped reads");
+        }
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/ReservoirDownsamplerUnitTest.java
@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import org.testng.Assert;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+public class ReservoirDownsamplerUnitTest extends BaseTest {
+
+    private static class ReservoirDownsamplerTest extends TestDataProvider {
+        int reservoirSize;
+        int totalReads;
+        int expectedNumReadsAfterDownsampling;
+        int expectedNumDiscardedItems;
+
+        public ReservoirDownsamplerTest( int reservoirSize, int totalReads ) {
+            super(ReservoirDownsamplerTest.class);
+
+            this.reservoirSize = reservoirSize;
+            this.totalReads = totalReads;
+
+            expectedNumReadsAfterDownsampling = Math.min(reservoirSize, totalReads);
+            expectedNumDiscardedItems = totalReads <= reservoirSize ? 0 : totalReads - reservoirSize;
+
+            setName(String.format("%s: reservoirSize=%d totalReads=%d expectedNumReadsAfterDownsampling=%d expectedNumDiscardedItems=%d",
+                    getClass().getSimpleName(), reservoirSize, totalReads, expectedNumReadsAfterDownsampling, expectedNumDiscardedItems));
+        }
+
+        public Collection<SAMRecord> createReads() {
+            Collection<SAMRecord> reads = new ArrayList<SAMRecord>(totalReads);
+
+            SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+            reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(totalReads, header, "foo", 0, 1, 100));
+
+            return reads;
+        }
+    }
+
+    @DataProvider(name = "ReservoirDownsamplerTestDataProvider")
+    public Object[][] createReservoirDownsamplerTestData() {
+        for ( int reservoirSize = 1; reservoirSize <= 10000; reservoirSize *= 10 ) {
+            new ReservoirDownsamplerTest(reservoirSize, 0);
+            for ( int totalReads = 1; totalReads <= 10000; totalReads *= 10 ) {
+                new ReservoirDownsamplerTest(reservoirSize, totalReads);
+            }
+        }
+
+        return ReservoirDownsamplerTest.getTests(ReservoirDownsamplerTest.class);
+    }
+
+    @Test(dataProvider = "ReservoirDownsamplerTestDataProvider")
+    public void testReservoirDownsampler( ReservoirDownsamplerTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        ReadsDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(test.reservoirSize);
+
+        downsampler.submit(test.createReads());
+
+        if ( test.totalReads > 0 ) {
+            Assert.assertTrue(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() != null);
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        downsampler.signalEndOfInput();
+
+        if ( test.totalReads > 0 ) {
+            Assert.assertTrue(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() != null);
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+        Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+
+        Assert.assertEquals(downsampledReads.size(), test.expectedNumReadsAfterDownsampling);
+
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), test.expectedNumDiscardedItems);
+        Assert.assertEquals(test.totalReads - downsampledReads.size(), test.expectedNumDiscardedItems);
+
+        downsampler.reset();
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/downsampling/SimplePositionalDownsamplerUnitTest.java
@ -0,0 +1,330 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+import org.testng.Assert;
+
+import java.util.*;
+
+public class SimplePositionalDownsamplerUnitTest extends BaseTest {
+
+    private static class SimplePositionalDownsamplerTest extends TestDataProvider {
+        int targetCoverage;
+        int numStacks;
+        List<Integer> stackSizes;
+        List<Integer> expectedStackSizes;
+        boolean multipleContigs;
+        int totalInitialReads;
+
+        public SimplePositionalDownsamplerTest( int targetCoverage, List<Integer> stackSizes, boolean multipleContigs ) {
+            super(SimplePositionalDownsamplerTest.class);
+
+            this.targetCoverage = targetCoverage;
+            this.numStacks = stackSizes.size();
+            this.stackSizes = stackSizes;
+            this.multipleContigs = multipleContigs;
+
+            calculateExpectedDownsampledStackSizes();
+
+            totalInitialReads = 0;
+            for ( Integer stackSize : stackSizes ) {
+                totalInitialReads += stackSize;
+            }
+
+            setName(String.format("%s: targetCoverage=%d numStacks=%d stackSizes=%s expectedSizes=%s multipleContigs=%b",
+                    getClass().getSimpleName(), targetCoverage, numStacks, stackSizes, expectedStackSizes, multipleContigs));
+        }
+
+        public Collection<SAMRecord> createReads() {
+            Collection<SAMRecord> reads = new ArrayList<SAMRecord>();
+            SAMFileHeader header = multipleContigs ?
+                                   ArtificialSAMUtils.createArtificialSamHeader(2, 1, 1000000) :
+                                   ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+
+            int refIndex = 0;
+            int alignmentStart = 1;
+            int readLength = 100;
+
+            for ( int i = 0; i < numStacks; i++ ) {
+                if ( multipleContigs && refIndex == 0 && i >= numStacks / 2 ) {
+                    refIndex++;
+                }
+
+                reads.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(stackSizes.get(i), header, "foo",
+                                                                                      refIndex, alignmentStart, readLength));
+
+                alignmentStart += 10;
+            }
+
+            return reads;
+        }
+
+        private void calculateExpectedDownsampledStackSizes() {
+            expectedStackSizes = new ArrayList<Integer>(numStacks);
+
+            for ( Integer stackSize : stackSizes ) {
+                int expectedSize = targetCoverage >= stackSize ? stackSize : targetCoverage;
+                expectedStackSizes.add(expectedSize);
+            }
+        }
+    }
+
+    @DataProvider(name = "SimplePositionalDownsamplerTestDataProvider")
+    public Object[][] createSimplePositionalDownsamplerTestData() {
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        for ( int targetCoverage = 1; targetCoverage <= 10000; targetCoverage *= 10 ) {
+            for ( int contigs = 1; contigs <= 2; contigs++ ) {
+                for ( int numStacks = 0; numStacks <= 10; numStacks++ ) {
+                    List<Integer> stackSizes = new ArrayList<Integer>(numStacks);
+                    for ( int stack = 1; stack <= numStacks; stack++ ) {
+                        stackSizes.add(GenomeAnalysisEngine.getRandomGenerator().nextInt(targetCoverage * 2) + 1);
+                    }
+                    new SimplePositionalDownsamplerTest(targetCoverage, stackSizes, contigs > 1);
+                }
+            }
+        }
+
+        return SimplePositionalDownsamplerTest.getTests(SimplePositionalDownsamplerTest.class);
+    }
+
+    @Test( dataProvider = "SimplePositionalDownsamplerTestDataProvider" )
+    public void testSimplePostionalDownsampler( SimplePositionalDownsamplerTest test ) {
+        logger.warn("Running test: " + test);
+
+        GenomeAnalysisEngine.resetRandomGenerator();
+
+        ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(test.targetCoverage);
+
+        downsampler.submit(test.createReads());
+
+        if ( test.numStacks > 1 ) {
+            Assert.assertTrue(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() != null);
+            Assert.assertTrue(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() != null);
+        }
+        else if ( test.numStacks == 1 ) {
+            Assert.assertFalse(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null);
+            Assert.assertTrue(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() != null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        downsampler.signalEndOfInput();
+
+        if ( test.numStacks > 0 ) {
+            Assert.assertTrue(downsampler.hasFinalizedItems());
+            Assert.assertTrue(downsampler.peekFinalized() != null);
+            Assert.assertFalse(downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekPending() == null);
+        }
+        else {
+            Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+            Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+        }
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+        Assert.assertFalse(downsampler.hasFinalizedItems() || downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekFinalized() == null && downsampler.peekPending() == null);
+
+        if ( test.numStacks == 0 ) {
+            Assert.assertTrue(downsampledReads.isEmpty());
+        }
+        else {
+            List<Integer> downsampledStackSizes = getDownsampledStackSizesAndVerifySortedness(downsampledReads);
+
+            Assert.assertEquals(downsampledStackSizes.size(), test.numStacks);
+            Assert.assertEquals(downsampledStackSizes, test.expectedStackSizes);
+
+            int numReadsActuallyEliminated = test.totalInitialReads - downsampledReads.size();
+            int numReadsReportedEliminated = downsampler.getNumberOfDiscardedItems();
+            Assert.assertEquals(numReadsActuallyEliminated, numReadsReportedEliminated);
+        }
+
+        downsampler.reset();
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 0);
+    }
+
+    private List<Integer> getDownsampledStackSizesAndVerifySortedness( List<SAMRecord> downsampledReads ) {
+        List<Integer> stackSizes = new ArrayList<Integer>();
+
+        if ( downsampledReads.isEmpty() ) {
+            return stackSizes;
+        }
+
+        Iterator<SAMRecord> iter = downsampledReads.iterator();
+        Assert.assertTrue(iter.hasNext());
+
+        SAMRecord previousRead = iter.next();
+        int currentStackSize = 1;
+
+        while ( iter.hasNext() ) {
+            SAMRecord currentRead = iter.next();
+
+            if ( currentRead.getReferenceIndex() > previousRead.getReferenceIndex() || currentRead.getAlignmentStart() > previousRead.getAlignmentStart() ) {
+                stackSizes.add(currentStackSize);
+                currentStackSize = 1;
+            }
+            else if ( currentRead.getReferenceIndex() < previousRead.getReferenceIndex() || currentRead.getAlignmentStart() < previousRead.getAlignmentStart() ) {
+                Assert.fail(String.format("Reads are out of order: %s %s", previousRead, currentRead));
+            }
+            else {
+                currentStackSize++;
+            }
+
+            previousRead = currentRead;
+        }
+
+        stackSizes.add(currentStackSize);
+        return stackSizes;
+    }
+
+    @Test
+    public void testSimplePositionalDownsamplerSignalNoMoreReadsBefore() {
+        ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(1000);
+
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+
+        Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
+        readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(50, header, "foo", 0, 1, 100));
+        downsampler.submit(readStack);
+
+        Assert.assertFalse(downsampler.hasFinalizedItems());
+        Assert.assertTrue(downsampler.peekFinalized() == null);
+        Assert.assertTrue(downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekPending() != null);
+
+        SAMRecord laterRead = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 2, 100);
+        downsampler.signalNoMoreReadsBefore(laterRead);
+
+        Assert.assertTrue(downsampler.hasFinalizedItems());
+        Assert.assertTrue(downsampler.peekFinalized() != null);
+        Assert.assertFalse(downsampler.hasPendingItems());
+        Assert.assertTrue(downsampler.peekPending() == null);
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+
+        Assert.assertEquals(downsampledReads.size(), readStack.size());
+    }
+
+    @Test
+    public void testBasicUnmappedReadsSupport() {
+        ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
+
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+
+        Collection<SAMRecord> readStack = new ArrayList<SAMRecord>();
+        readStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
+                                                                                  SAMRecord.NO_ALIGNMENT_START, 100));
+        for ( SAMRecord read : readStack ) {
+            Assert.assertTrue(read.getReadUnmappedFlag());
+        }
+
+        downsampler.submit(readStack);
+        downsampler.signalEndOfInput();
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+
+        // Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
+        Assert.assertEquals(downsampledReads.size(), readStack.size());
+
+        for ( SAMRecord read: downsampledReads ) {
+            Assert.assertTrue(read.getReadUnmappedFlag());
+        }
+    }
+
+    @Test
+    public void testMixedMappedAndUnmappedReadsSupport() {
+        ReadsDownsampler<SAMRecord> downsampler = new SimplePositionalDownsampler<SAMRecord>(100);
+
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+
+        Collection<SAMRecord> mappedReadStack = new ArrayList<SAMRecord>();
+        mappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", 0, 1, 100));
+        for ( SAMRecord read : mappedReadStack ) {
+            Assert.assertFalse(read.getReadUnmappedFlag());
+        }
+
+        Collection<SAMRecord> unmappedReadStack = new ArrayList<SAMRecord>();
+        unmappedReadStack.addAll(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(200, header, "foo", SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX,
+                                                                                          SAMRecord.NO_ALIGNMENT_START, 100));
+        for ( SAMRecord read : unmappedReadStack ) {
+            Assert.assertTrue(read.getReadUnmappedFlag());
+        }
+
+        downsampler.submit(mappedReadStack);
+        downsampler.submit(unmappedReadStack);
+        downsampler.signalEndOfInput();
+
+        List<SAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+
+        // Unmapped reads should not get downsampled at all by the SimplePositionalDownsampler
+        Assert.assertEquals(downsampledReads.size(), 300);
+        Assert.assertEquals(downsampler.getNumberOfDiscardedItems(), 100);
+
+        int count = 1;
+        for ( SAMRecord read: downsampledReads ) {
+            if ( count <= 100 ) {
+                Assert.assertFalse(read.getReadUnmappedFlag());
+            }
+            else {
+                Assert.assertTrue(read.getReadUnmappedFlag());
+            }
+
+            count++;
+        }
+    }
+
+    @Test
+    public void testGATKSAMRecordSupport() {
+        ReadsDownsampler<GATKSAMRecord> downsampler = new SimplePositionalDownsampler<GATKSAMRecord>(1000);
+
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000000);
+
+        List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>();
+        for ( int i = 0; i < 10; i++ ) {
+            reads.add(ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, 20 * i + 10));
+        }
+
+        downsampler.submit(reads);
+        downsampler.signalEndOfInput();
+        List<GATKSAMRecord> downsampledReads = downsampler.consumeFinalizedItems();
+
+        Assert.assertEquals(downsampledReads.size(), 10);
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java
@ -0,0 +1,546 @@
+package org.broadinstitute.sting.gatk.iterators;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.util.CloseableIterator;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.ReadProperties;
+import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
+import org.broadinstitute.sting.gatk.filters.ReadFilter;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.pileup.PileupElement;
+import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.*;
+
+/**
+ * testing of the experimental version of LocusIteratorByState
+ */
+public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
+    private static SAMFileHeader header;
+    private LocusIteratorByStateExperimental li;
+    private GenomeLocParser genomeLocParser;
+
+    @BeforeClass
+    public void beforeClass() {
+        header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
+        genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
+    }
+
+    private final LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
+        return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
+    }
+
+    private static ReadProperties createTestReadProperties() {
+        return createTestReadProperties(null);
+    }
+
+    private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
+        return new ReadProperties(
+                Collections.<SAMReaderID>emptyList(),
+                new SAMFileHeader(),
+                false,
+                SAMFileReader.ValidationStringency.STRICT,
+                downsamplingMethod,
+                new ValidationExclusion(),
+                Collections.<ReadFilter>emptyList(),
+                Collections.<ReadTransformer>emptyList(),
+                false,
+                (byte) -1
+        );
+    }
+
+    private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
+        Iterator<T> iterator;
+
+        public FakeCloseableIterator(Iterator<T> it) {
+            iterator = it;
+        }
+
+        @Override
+        public void close() {
+            return;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return iterator.hasNext();
+        }
+
+        @Override
+        public T next() {
+            return iterator.next();
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("Don't remove!");
+        }
+    }
+
+    @Test
+    public void testXandEQOperators() {
+        final byte[] bases1 = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
+        final byte[] bases2 = new byte[] {'A','A','A','C','A','A','A','A','A','C'};
+
+        // create a test version of the Reads object
+        ReadProperties readAttributes = createTestReadProperties();
+
+        SAMRecord r1 = ArtificialSAMUtils.createArtificialRead(header,"r1",0,1,10);
+        r1.setReadBases(bases1);
+        r1.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        r1.setCigarString("10M");
+
+        SAMRecord r2 = ArtificialSAMUtils.createArtificialRead(header,"r2",0,1,10);
+        r2.setReadBases(bases2);
+        r2.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
+        r2.setCigarString("3=1X5=1X");
+
+        SAMRecord r3 = ArtificialSAMUtils.createArtificialRead(header,"r3",0,1,10);
+        r3.setReadBases(bases2);
+        r3.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
+        r3.setCigarString("3=1X5M1X");
+
+        SAMRecord r4  = ArtificialSAMUtils.createArtificialRead(header,"r4",0,1,10);
+        r4.setReadBases(bases2);
+        r4.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        r4.setCigarString("10M");
+
+        List<SAMRecord> reads = Arrays.asList(r1, r2, r3, r4);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads,readAttributes);
+
+        while (li.hasNext()) {
+            AlignmentContext context = li.next();
+            ReadBackedPileup pileup = context.getBasePileup();
+            Assert.assertEquals(pileup.depthOfCoverage(), 4);
+        }
+    }
+
+    @Test
+    public void testIndelsInRegularPileup() {
+        final byte[] bases = new byte[] {'A','A','A','A','A','A','A','A','A','A'};
+        final byte[] indelBases = new byte[] {'A','A','A','A','C','T','A','A','A','A','A','A'};
+
+        // create a test version of the Reads object
+        ReadProperties readAttributes = createTestReadProperties();
+
+        SAMRecord before = ArtificialSAMUtils.createArtificialRead(header,"before",0,1,10);
+        before.setReadBases(bases);
+        before.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        before.setCigarString("10M");
+
+        SAMRecord during = ArtificialSAMUtils.createArtificialRead(header,"during",0,2,10);
+        during.setReadBases(indelBases);
+        during.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20,20,20});
+        during.setCigarString("4M2I6M");
+
+        SAMRecord after  = ArtificialSAMUtils.createArtificialRead(header,"after",0,3,10);
+        after.setReadBases(bases);
+        after.setBaseQualities(new byte[] {20,20,20,20,20,20,20,20,20,20});
+        after.setCigarString("10M");
+
+        List<SAMRecord> reads = Arrays.asList(before, during, after);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads,readAttributes);
+
+        boolean foundIndel = false;
+        while (li.hasNext()) {
+            AlignmentContext context = li.next();
+            ReadBackedPileup pileup = context.getBasePileup().getBaseFilteredPileup(10);
+            for (PileupElement p : pileup) {
+                if (p.isBeforeInsertion()) {
+                    foundIndel = true;
+                    Assert.assertEquals(p.getEventLength(), 2, "Wrong event length");
+                    Assert.assertEquals(p.getEventBases(), "CT", "Inserted bases are incorrect");
+                    break;
+               }
+            }
+
+         }
+
+         Assert.assertTrue(foundIndel,"Indel in pileup not found");
+    }
+
+    @Test
+    public void testWholeIndelReadInIsolation() {
+        final int firstLocus = 44367789;
+
+        // create a test version of the Reads object
+        ReadProperties readAttributes = createTestReadProperties();
+
+        SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header, "indelOnly", 0, firstLocus, 76);
+        indelOnlyRead.setReadBases(Utils.dupBytes((byte)'A',76));
+        indelOnlyRead.setBaseQualities(Utils.dupBytes((byte) '@', 76));
+        indelOnlyRead.setCigarString("76I");
+
+        List<SAMRecord> reads = Arrays.asList(indelOnlyRead);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads, readAttributes);
+
+        // Traditionally, reads that end with indels bleed into the pileup at the following locus.  Verify that the next pileup contains this read
+        // and considers it to be an indel-containing read.
+        Assert.assertTrue(li.hasNext(),"Should have found a whole-indel read in the normal base pileup without extended events enabled");
+        AlignmentContext alignmentContext = li.next();
+        Assert.assertEquals(alignmentContext.getLocation().getStart(), firstLocus, "Base pileup is at incorrect location.");
+        ReadBackedPileup basePileup = alignmentContext.getBasePileup();
+        Assert.assertEquals(basePileup.getReads().size(),1,"Pileup is of incorrect size");
+        Assert.assertSame(basePileup.getReads().get(0), indelOnlyRead, "Read in pileup is incorrect");
+    }
+
+    /**
+     * Test to make sure that reads supporting only an indel (example cigar string: 76I) do
+     * not negatively influence the ordering of the pileup.
+     */
+    @Test
+    public void testWholeIndelRead() {
+        final int firstLocus = 44367788, secondLocus = firstLocus + 1;
+
+        SAMRecord leadingRead = ArtificialSAMUtils.createArtificialRead(header,"leading",0,firstLocus,76);
+        leadingRead.setReadBases(Utils.dupBytes((byte)'A',76));
+        leadingRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
+        leadingRead.setCigarString("1M75I");
+
+        SAMRecord indelOnlyRead = ArtificialSAMUtils.createArtificialRead(header,"indelOnly",0,secondLocus,76);
+        indelOnlyRead.setReadBases(Utils.dupBytes((byte) 'A', 76));
+        indelOnlyRead.setBaseQualities(Utils.dupBytes((byte)'@',76));
+        indelOnlyRead.setCigarString("76I");
+
+        SAMRecord fullMatchAfterIndel = ArtificialSAMUtils.createArtificialRead(header,"fullMatch",0,secondLocus,76);
+        fullMatchAfterIndel.setReadBases(Utils.dupBytes((byte)'A',76));
+        fullMatchAfterIndel.setBaseQualities(Utils.dupBytes((byte)'@',76));
+        fullMatchAfterIndel.setCigarString("75I1M");
+
+        List<SAMRecord> reads = Arrays.asList(leadingRead, indelOnlyRead, fullMatchAfterIndel);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads, createTestReadProperties());
+        int currentLocus = firstLocus;
+        int numAlignmentContextsFound = 0;
+
+        while(li.hasNext()) {
+            AlignmentContext alignmentContext = li.next();
+            Assert.assertEquals(alignmentContext.getLocation().getStart(),currentLocus,"Current locus returned by alignment context is incorrect");
+
+            if(currentLocus == firstLocus) {
+                List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
+                Assert.assertEquals(readsAtLocus.size(),1,"Wrong number of reads at locus " + currentLocus);
+                Assert.assertSame(readsAtLocus.get(0),leadingRead,"leadingRead absent from pileup at locus " + currentLocus);
+            }
+            else if(currentLocus == secondLocus) {
+                List<GATKSAMRecord> readsAtLocus = alignmentContext.getBasePileup().getReads();
+                Assert.assertEquals(readsAtLocus.size(),2,"Wrong number of reads at locus " + currentLocus);
+                Assert.assertSame(readsAtLocus.get(0),indelOnlyRead,"indelOnlyRead absent from pileup at locus " + currentLocus);
+                Assert.assertSame(readsAtLocus.get(1),fullMatchAfterIndel,"fullMatchAfterIndel absent from pileup at locus " + currentLocus);
+            }
+
+            currentLocus++;
+            numAlignmentContextsFound++;
+        }
+
+        Assert.assertEquals(numAlignmentContextsFound, 2, "Found incorrect number of alignment contexts");
+    }
+
+    /**
+     * Test to make sure that reads supporting only an indel (example cigar string: 76I) are represented properly
+     */
+    @Test
+    public void testWholeIndelReadRepresentedTest() {
+        final int firstLocus = 44367788, secondLocus = firstLocus + 1;
+
+        SAMRecord read1 = ArtificialSAMUtils.createArtificialRead(header,"read1",0,secondLocus,1);
+        read1.setReadBases(Utils.dupBytes((byte) 'A', 1));
+        read1.setBaseQualities(Utils.dupBytes((byte) '@', 1));
+        read1.setCigarString("1I");
+
+        List<SAMRecord> reads = Arrays.asList(read1);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads, createTestReadProperties());
+
+        while(li.hasNext()) {
+            AlignmentContext alignmentContext = li.next();
+            ReadBackedPileup p = alignmentContext.getBasePileup();
+            Assert.assertTrue(p.getNumberOfElements() == 1);
+            PileupElement pe = p.iterator().next();
+            Assert.assertTrue(pe.isBeforeInsertion());
+            Assert.assertFalse(pe.isAfterInsertion());
+            Assert.assertEquals(pe.getEventBases(), "A");
+        }
+
+        SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(header,"read2",0,secondLocus,10);
+        read2.setReadBases(Utils.dupBytes((byte) 'A', 10));
+        read2.setBaseQualities(Utils.dupBytes((byte) '@', 10));
+        read2.setCigarString("10I");
+
+        reads = Arrays.asList(read2);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(reads, createTestReadProperties());
+
+        while(li.hasNext()) {
+            AlignmentContext alignmentContext = li.next();
+            ReadBackedPileup p = alignmentContext.getBasePileup();
+            Assert.assertTrue(p.getNumberOfElements() == 1);
+            PileupElement pe = p.iterator().next();
+            Assert.assertTrue(pe.isBeforeInsertion());
+            Assert.assertFalse(pe.isAfterInsertion());
+            Assert.assertEquals(pe.getEventBases(), "AAAAAAAAAA");
+        }
+    }
+
+    ////////////////////////////////////////////
+    // comprehensive LIBS/PileupElement tests //
+    ////////////////////////////////////////////
+
+    private static final int IS_BEFORE_DELETED_BASE_FLAG = 1;
+    private static final int IS_BEFORE_DELETION_START_FLAG = 2;
+    private static final int IS_AFTER_DELETED_BASE_FLAG = 4;
+    private static final int IS_AFTER_DELETION_END_FLAG = 8;
+    private static final int IS_BEFORE_INSERTION_FLAG = 16;
+    private static final int IS_AFTER_INSERTION_FLAG = 32;
+    private static final int IS_NEXT_TO_SOFTCLIP_FLAG = 64;
+
+    private static class LIBSTest {
+
+
+        final String cigar;
+        final int readLength;
+        final List<Integer> offsets;
+        final List<Integer> flags;
+
+        private LIBSTest(final String cigar, final int readLength, final List<Integer> offsets, final List<Integer> flags) {
+            this.cigar = cigar;
+            this.readLength = readLength;
+            this.offsets = offsets;
+            this.flags = flags;
+        }
+    }
+
+    @DataProvider(name = "LIBSTest")
+    public Object[][] createLIBSTestData() {
+        return new Object[][]{
+                {new LIBSTest("1I", 1, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
+                {new LIBSTest("10I", 10, Arrays.asList(0), Arrays.asList(IS_BEFORE_INSERTION_FLAG))},
+                {new LIBSTest("2M2I2M", 6, Arrays.asList(0,1,4,5), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG,IS_AFTER_INSERTION_FLAG,0))},
+                {new LIBSTest("2M2I", 4, Arrays.asList(0,1), Arrays.asList(0,IS_BEFORE_INSERTION_FLAG))},
+                //TODO -- uncomment these when LIBS is fixed
+                //{new LIBSTest("2I2M", 4, Arrays.asList(2,3), Arrays.asList(IS_AFTER_INSERTION_FLAG,0))},
+                //{new LIBSTest("1I1M1D1M", 3, Arrays.asList(0,1), Arrays.asList(IS_AFTER_INSERTION_FLAG | IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG))},
+                //{new LIBSTest("1S1I1M", 3, Arrays.asList(2), Arrays.asList(IS_AFTER_INSERTION_FLAG))},
+                {new LIBSTest("1M2D2M", 3, Arrays.asList(0,1,2), Arrays.asList(IS_BEFORE_DELETION_START_FLAG | IS_BEFORE_DELETED_BASE_FLAG,IS_AFTER_DELETED_BASE_FLAG | IS_AFTER_DELETION_END_FLAG,0))},
+                {new LIBSTest("1S1M", 2, Arrays.asList(1), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
+                {new LIBSTest("1M1S", 2, Arrays.asList(0), Arrays.asList(IS_NEXT_TO_SOFTCLIP_FLAG))},
+                {new LIBSTest("1S1M1I", 3, Arrays.asList(1), Arrays.asList(IS_BEFORE_INSERTION_FLAG | IS_NEXT_TO_SOFTCLIP_FLAG))}
+        };
+    }
+
+    @Test(dataProvider = "LIBSTest")
+    public void testLIBS(LIBSTest params) {
+        final int locus = 44367788;
+
+        SAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "read", 0, locus, params.readLength);
+        read.setReadBases(Utils.dupBytes((byte) 'A', params.readLength));
+        read.setBaseQualities(Utils.dupBytes((byte) '@', params.readLength));
+        read.setCigarString(params.cigar);
+
+        // create the iterator by state with the fake reads and fake records
+        li = makeLTBS(Arrays.asList(read), createTestReadProperties());
+
+        int offset = 0;
+        while ( li.hasNext() ) {
+            AlignmentContext alignmentContext = li.next();
+            ReadBackedPileup p = alignmentContext.getBasePileup();
+            Assert.assertTrue(p.getNumberOfElements() == 1);
+            PileupElement pe = p.iterator().next();
+
+            final int flag = params.flags.get(offset);
+            Assert.assertEquals(pe.isBeforeDeletedBase(), (flag & IS_BEFORE_DELETED_BASE_FLAG) != 0);
+            Assert.assertEquals(pe.isBeforeDeletionStart(), (flag & IS_BEFORE_DELETION_START_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterDeletedBase(), (flag & IS_AFTER_DELETED_BASE_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterDeletionEnd(), (flag & IS_AFTER_DELETION_END_FLAG) != 0);
+            Assert.assertEquals(pe.isBeforeInsertion(), (flag & IS_BEFORE_INSERTION_FLAG) != 0);
+            Assert.assertEquals(pe.isAfterInsertion(), (flag & IS_AFTER_INSERTION_FLAG) != 0);
+            Assert.assertEquals(pe.isNextToSoftClip(), (flag & IS_NEXT_TO_SOFTCLIP_FLAG) != 0);
+
+            Assert.assertEquals(pe.getOffset(), params.offsets.get(offset).intValue());
+
+            offset++;
+        }
+    }
+
+    ////////////////////////////////////////////////
+    // End comprehensive LIBS/PileupElement tests //
+    ////////////////////////////////////////////////
+
+
+    ///////////////////////////////////////
+    // Read State Manager Tests          //
+    ///////////////////////////////////////
+
+    private class PerSampleReadStateManagerTest extends TestDataProvider {
+        private List<Integer> readCountsPerAlignmentStart;
+        private List<SAMRecord> reads;
+        private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
+        private int removalInterval;
+
+        public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
+            super(PerSampleReadStateManagerTest.class);
+
+            this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
+            this.removalInterval = removalInterval;
+
+            reads = new ArrayList<SAMRecord>();
+            recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
+
+            setName(String.format("%s: readCountsPerAlignmentStart: %s  removalInterval: %d",
+                                  getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
+        }
+
+        public void run() {
+            LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
+            LocusIteratorByStateExperimental.ReadStateManager readStateManager =
+                    libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
+            LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
+                    readStateManager.new PerSampleReadStateManager();
+
+            makeReads();
+
+            for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
+                perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
+            }
+
+            // read state manager should have the right number of reads
+            Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
+
+            Iterator<SAMRecord> originalReadsIterator = reads.iterator();
+            Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
+            int recordStateCount = 0;
+            int numReadStatesRemoved = 0;
+
+            // Do a first-pass validation of the record state iteration by making sure we get back everything we
+            // put in, in the same order, doing any requested removals of read states along the way
+            while ( recordStateIterator.hasNext() ) {
+                LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
+                recordStateCount++;
+                SAMRecord readFromPerSampleReadStateManager = readState.getRead();
+
+                Assert.assertTrue(originalReadsIterator.hasNext());
+                SAMRecord originalRead = originalReadsIterator.next();
+
+                // The read we get back should be literally the same read in memory as we put in
+                Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
+
+                // If requested, remove a read state every removalInterval states
+                if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
+                    recordStateIterator.remove();
+                    numReadStatesRemoved++;
+                }
+            }
+
+            Assert.assertFalse(originalReadsIterator.hasNext());
+
+            // If we removed any read states, do a second pass through the read states to make sure the right
+            // states were removed
+            if ( numReadStatesRemoved > 0 ) {
+                Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
+
+                originalReadsIterator = reads.iterator();
+                recordStateIterator = perSampleReadStateManager.iterator();
+                int readCount = 0;
+                int readStateCount = 0;
+
+                // Match record states with the reads that should remain after removal
+                while ( recordStateIterator.hasNext() ) {
+                    LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
+                    readStateCount++;
+                    SAMRecord readFromPerSampleReadStateManager = readState.getRead();
+
+                    Assert.assertTrue(originalReadsIterator.hasNext());
+
+                    SAMRecord originalRead = originalReadsIterator.next();
+                    readCount++;
+
+                    if ( readCount % removalInterval == 0 ) {
+                        originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
+                        readCount++;
+                    }
+
+                    // The read we get back should be literally the same read in memory as we put in (after accounting for removals)
+                    Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
+                }
+
+                Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
+            }
+
+            // Allow memory used by this test to be reclaimed
+            readCountsPerAlignmentStart = null;
+            reads = null;
+            recordStatesByAlignmentStart = null;
+        }
+
+        private void makeReads() {
+            int alignmentStart = 1;
+
+            for ( int readsThisStack : readCountsPerAlignmentStart ) {
+                ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
+                ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
+
+                for ( SAMRecord read : stackReads ) {
+                    stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
+                }
+
+                reads.addAll(stackReads);
+                recordStatesByAlignmentStart.add(stackRecordStates);
+            }
+        }
+    }
+
+    @DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
+    public Object[][] createPerSampleReadStateManagerTests() {
+        for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
+                                                                     Arrays.asList(2),
+                                                                     Arrays.asList(10),
+                                                                     Arrays.asList(1, 1),
+                                                                     Arrays.asList(2, 2),
+                                                                     Arrays.asList(10, 10),
+                                                                     Arrays.asList(1, 10),
+                                                                     Arrays.asList(10, 1),
+                                                                     Arrays.asList(1, 1, 1),
+                                                                     Arrays.asList(2, 2, 2),
+                                                                     Arrays.asList(10, 10, 10),
+                                                                     Arrays.asList(1, 1, 1, 1, 1, 1),
+                                                                     Arrays.asList(10, 10, 10, 10, 10, 10),
+                                                                     Arrays.asList(1, 2, 10, 1, 2, 10)
+                                                                   ) ) {
+
+            for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
+                new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
+            }
+        }
+
+        return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
+    }
+
+    @Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
+    public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
+        logger.warn("Running test: " + test);
+
+        test.run();
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/VerifyingSamIteratorUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/VerifyingSamIteratorUnitTest.java
@ -28,14 +28,12 @@ import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
-import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;

-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;

@ -48,7 +46,6 @@ import java.util.List;
 */
 public class VerifyingSamIteratorUnitTest {
    private SAMFileHeader samFileHeader;
-    private GenomeLocParser genomeLocParser;

    @BeforeClass
    public void init() {
@ -58,8 +55,6 @@ public class VerifyingSamIteratorUnitTest {

        samFileHeader = new SAMFileHeader();
        samFileHeader.setSequenceDictionary(sequenceDictionary);
-
-        genomeLocParser = new GenomeLocParser(sequenceDictionary);
    }

    @Test
@ -68,7 +63,7 @@ public class VerifyingSamIteratorUnitTest {
        SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(0).getSequenceIndex(),2,10);
        List<SAMRecord> reads = Arrays.asList(read1,read2);

-        VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
+        VerifyingSamIterator iterator = new VerifyingSamIterator(StingSAMIteratorAdapter.adapt(reads.iterator()));

        Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
        Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
@ -83,7 +78,7 @@ public class VerifyingSamIteratorUnitTest {
        SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(1).getSequenceIndex(),1,10);
        List<SAMRecord> reads = Arrays.asList(read1,read2);

-        VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
+        VerifyingSamIterator iterator = new VerifyingSamIterator(StingSAMIteratorAdapter.adapt(reads.iterator()));

        Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
        Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
@ -98,7 +93,7 @@ public class VerifyingSamIteratorUnitTest {
        SAMRecord read2 = ArtificialSAMUtils.createArtificialRead(samFileHeader,"read2",getContig(0).getSequenceIndex(),1,10);
        List<SAMRecord> reads = Arrays.asList(read1,read2);

-        VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
+        VerifyingSamIterator iterator = new VerifyingSamIterator(StingSAMIteratorAdapter.adapt(reads.iterator()));

        Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
        Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
@ -116,7 +111,7 @@ public class VerifyingSamIteratorUnitTest {
        read1.setReferenceIndex(SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX);
        List<SAMRecord> reads = Arrays.asList(read1,read2);

-        VerifyingSamIterator iterator = new VerifyingSamIterator(genomeLocParser,StingSAMIteratorAdapter.adapt(reads.iterator()));
+        VerifyingSamIterator iterator = new VerifyingSamIterator(StingSAMIteratorAdapter.adapt(reads.iterator()));

        Assert.assertTrue(iterator.hasNext(),"Insufficient reads");
        Assert.assertSame(iterator.next(),read1,"Incorrect read in read 1 position");
--- a/Show More
+++ b/Show More