Bringing up scaffolding for integration of locus traversals by reference with Aaron's data source code.

Reverts to original TraverseByLociByReference behavior unless a special combination of command-line flags are used. Lightly tested at best, and major flaws include: - MicroManager is not doing MicroScheduling right now; it's driving the traversals. - New database-ish data providers imply by their interface that they're stateless, but they're highly stateful. - Using static objects to circumvent encapsulation. - Code duplication is rampant. - Plus more! git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@346 348d0f76-0448-11de-a6fe-93d51630548a
2009-04-09 20:28:17 +00:00 · 2009-04-09 20:28:17 +00:00 · 8a1207e4db
parent 49b2622e3d
commit 8a1207e4db
12 changed files with 417 additions and 66 deletions
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisTK.java
@ -20,6 +20,7 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.gatk.traversals.*;
+import org.broadinstitute.sting.gatk.executive.MicroManager;
 import org.broadinstitute.sting.utils.FastaSequenceFile2;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.Utils;
@ -65,6 +66,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
    private TraversalEngine engine = null;
    public boolean DEBUGGING = false;
    public Boolean WALK_ALL_LOCI = false;
+    public Boolean ENABLE_THREADING = false;

    /**
     * An output file presented to the walker.
@ -81,6 +83,11 @@ public class GenomeAnalysisTK extends CommandLineProgram {
     */
    public String outErrFileName = null;

+    /**
+     * How many threads should be allocated to this analysis.
+     */
+    public int numThreads = 1;    
+
    /**
     * The output stream, initialized from OUTFILENAME / OUTERRFILENAME.
     * Used by the walker.
@ -127,7 +134,9 @@ public class GenomeAnalysisTK extends CommandLineProgram {
        m_parser.addOptionalArg("out", "o", "An output file presented to the walker.  Will overwrite contents if file exists.", "outFileName" );
        m_parser.addOptionalArg("err", "e", "An error output file presented to the walker.  Will overwrite contents if file exists.", "errFileName" );
        m_parser.addOptionalArg("outerr", "oe", "A joint file for 'normal' and error output presented to the walker.  Will overwrite contents if file exists.", "outErrFileName");
-
+        
+        m_parser.addOptionalArg("numthreads", "nt", "How many threads should be allocated to running this analysis.", "numThreads");
+        m_parser.addOptionalFlag("enablethreading", "et", "Enable experimental threading support.", "ENABLE_THREADING");
        //TODO: remove when walkers can ask for tracks
        m_parser.addOptionalArg("mother", "MOM", "Mother's genotype (SAM pileup)", "MOTHER_GENOTYPE_FILE");
        m_parser.addOptionalArg("father", "DAD", "Father's genotype (SAM pileup)", "FATHER_GENOTYPE_FILE");
@ -138,7 +147,7 @@ public class GenomeAnalysisTK extends CommandLineProgram {
                                        .hasArgs()
                                        .withDescription( "Bind rod with <name> and <type> to <file>" )
                                        .create("B");
-        m_parser.addOptionalArg(rodBinder, "ROD_BINDINGS");        
+        m_parser.addOptionalArg(rodBinder, "ROD_BINDINGS");
    }

    /**
@ -233,6 +242,15 @@ public class GenomeAnalysisTK extends CommandLineProgram {
            throw new RuntimeException( "Unable to access walker", ex );
        }

+        // Prepare the sort ordering w.r.t. the sequence dictionary
+        FastaSequenceFile2 refFile = null;
+        if (REF_FILE_ARG != null) {
+            refFile = new FastaSequenceFile2(REF_FILE_ARG);
+            GenomeLoc.setupRefContigOrdering(refFile);
+        }
+
+        MicroManager microManager = null;
+
        // Try to get the walker specified
        try {
            LocusWalker<?, ?> walker = (LocusWalker<?, ?>) my_walker;
@ -245,9 +263,18 @@ public class GenomeAnalysisTK extends CommandLineProgram {
                if ( walker.cannotHandleReads() )
                    Utils.scareUser(String.format("Analysis %s doesn't support SAM/BAM reads, but a read file %s was provided", Analysis_Name, INPUT_FILE));

-
-                if ( WALK_ALL_LOCI )
-                	this.engine = new TraverseByLociByReference(INPUT_FILE, REF_FILE_ARG, rods); 
+                if ( WALK_ALL_LOCI ) {
+                    // TODO: Temporary debugging code.  Activate the new debugging code only when the MicroManager
+                    //                                  is not filtered.
+                    if( ENABLE_THREADING && REGION_STR == null ) {
+                        logger.warn("Preliminary threading support enabled");
+                        microManager = new MicroManager( INPUT_FILE, REF_FILE_ARG, numThreads );
+                        this.engine = microManager.getTraversalEngine();
+                    }
+                    else {
+                        this.engine = new TraverseByLociByReference(INPUT_FILE, REF_FILE_ARG, rods);
+                    }
+                }
                else
                	this.engine = new TraverseByLoci(INPUT_FILE, REF_FILE_ARG, rods);
            }
@ -263,14 +290,13 @@ public class GenomeAnalysisTK extends CommandLineProgram {
            final ReferenceSequenceFile refFile = ReferenceSequenceFileFactory.getReferenceSequenceFile(REF_FILE_ARG);
            GenomeLoc.setupRefContigOrdering(refFile);
        }
+
+        // Determine the validation stringency.  Default to ValidationStringency.STRICT.
        ValidationStringency strictness;
-        if (STRICTNESS_ARG == null) {
-            strictness = ValidationStringency.STRICT;
-        } else if (STRICTNESS_ARG.toLowerCase().equals("lenient")) {
-            strictness = ValidationStringency.LENIENT;
-        } else if (STRICTNESS_ARG.toLowerCase().equals("silent")) {
-            strictness = ValidationStringency.SILENT;
-        } else {
+        try {
+            strictness = Enum.valueOf(ValidationStringency.class, STRICTNESS_ARG);
+        }
+        catch( IllegalArgumentException ex ) {
            strictness = ValidationStringency.STRICT;
        }
        logger.info("Strictness is " + strictness);
@ -304,7 +330,12 @@ public class GenomeAnalysisTK extends CommandLineProgram {
        engine.setWalkOverAllSites(WALK_ALL_LOCI);
        engine.initialize();

-        engine.traverse(my_walker);
+        if( microManager != null ) {
+            List<GenomeLoc> locations = GenomeLoc.parseGenomeLocs( REGION_STR );
+            microManager.execute( my_walker, locations );
+        }
+        else
+            engine.traverse(my_walker);

        return 0;
    }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextProvider.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/LocusContextProvider.java
@ -0,0 +1,112 @@
+package org.broadinstitute.sting.gatk.dataSources.providers;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.gatk.iterators.LocusIterator;
+import org.broadinstitute.sting.gatk.iterators.LocusIteratorByHanger;
+import org.broadinstitute.sting.gatk.LocusContext;
+import org.broadinstitute.sting.gatk.traversals.TraversalStatistics;
+import org.apache.log4j.Logger;
+import net.sf.samtools.SAMRecord;
+
+import java.util.Iterator;
+import java.util.ArrayList;
+
+import edu.mit.broad.picard.filter.FilteringIterator;
+import edu.mit.broad.picard.filter.SamRecordFilter;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: hanna
+ * Date: Apr 8, 2009
+ * Time: 3:00:28 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class LocusContextProvider {
+    private Iterator<SAMRecord> reads;
+
+    // What's the last locus accessed?  Used for sanity checking.
+    private GenomeLoc lastLoc = null;
+    private LocusIterator loci;
+
+    protected static Logger logger = Logger.getLogger(LocusContextProvider.class);
+
+    public LocusContextProvider( Iterator<SAMRecord> reads ) {
+        this.reads = new FilteringIterator(reads, new locusStreamFilterFunc());
+        // prepare the iterator by loci from reads
+        loci = new LocusIteratorByHanger(this.reads);
+    }
+
+    public LocusContext getLocusContext( GenomeLoc loc ) {
+        // Precondition checks
+        if( lastLoc != null && !loc.isPast( lastLoc ) )
+            throw new RuntimeException( "Internal error: LocusContextProvider assumes that queries it receives are ordered." );
+
+        if( (loc.getStop() - loc.getStart()) > 0 )
+            throw new RuntimeException( "Internal error :LocusContextProviders currently require 1-base genomeLocs.");
+
+        // jump to the first reference site
+        LocusContext locusContext = advanceReadsToLoc( loci, loc );
+
+        // if no locus context was found, create an empty locus
+        if ( locusContext == null || locusContext.getLocation().compareTo( loc ) != 0 )
+            locusContext = new LocusContext(loc, new ArrayList<SAMRecord>(), new ArrayList<Integer>());
+
+        lastLoc = loc;
+
+        return locusContext;
+    }
+
+    private LocusContext advanceReadsToLoc(LocusIterator locusIter, GenomeLoc target) {
+        if ( ! locusIter.hasNext() )
+            return null;
+
+        LocusContext locus = locusIter.next();
+
+        while ( ! target.containsP(locus.getLocation()) && locusIter.hasNext() ) {
+            logger.debug(String.format("  current locus is %s vs %s => %d", locus.getLocation(), target, locus.getLocation().compareTo(target)));
+            locus = locusIter.next();
+        }
+
+        logger.debug(String.format("  returning %s", locus.getLocation()));
+        return locus;
+    }
+
+    /**
+     * Class to filter out un-handle-able reads from the stream.  We currently are skipping
+     * unmapped reads, non-primary reads, unaligned reads, and those with indels.  We should
+     * really change this to handle indel containing reads.
+     * TODO: Update TraversalEngine with our runtime stats.
+     */
+    private class locusStreamFilterFunc implements SamRecordFilter {
+        SAMRecord lastRead = null;
+        public boolean filterOut(SAMRecord rec) {
+            boolean result = false;
+            String why = "";
+            if (rec.getReadUnmappedFlag()) {
+                TraversalStatistics.nUnmappedReads++;
+                result = true;
+                why = "Unmapped";
+            } else if (rec.getNotPrimaryAlignmentFlag()) {
+                TraversalStatistics.nNotPrimary++;
+                result = true;
+                why = "Not Primary";
+            } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
+                TraversalStatistics.nBadAlignments++;
+                result = true;
+                why = "No alignment start";
+            }
+            else {
+                result = false;
+            }
+
+            if (result) {
+                //nSkippedReads++;
+                //System.out.printf("  [filter] %s => %b %s", rec.getReadName(), result, why);
+            } else {
+                //nReads++;
+            }
+            return result;
+        }
+    }
+
+}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/providers/ReferenceProvider.java
@ -0,0 +1,27 @@
+package org.broadinstitute.sting.gatk.dataSources.providers;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: hanna
+ * Date: Apr 8, 2009
+ * Time: 5:01:37 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class ReferenceProvider {
+    private ReferenceIterator reference;
+
+    public ReferenceProvider( ReferenceIterator reference ) {
+        this.reference = reference;
+    }
+
+    public ReferenceIterator getReferenceSequence( GenomeLoc genomeLoc ) {
+        if( (genomeLoc.getStop() - genomeLoc.getStart()) > 0 )
+            throw new RuntimeException( "Internal error :LocusContextProviders currently require 1-base genomeLocs.");
+
+        // jump to the first reference site
+        return reference.seekForward(genomeLoc);                    
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroManager.java
@ -1,22 +1,88 @@
 package org.broadinstitute.sting.gatk.executive;

 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.FastaSequenceFile2;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategy;
+import org.broadinstitute.sting.gatk.dataSources.shards.ShardStrategyFactory;
+import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
+import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SAMBAMDataSource;
+import org.broadinstitute.sting.gatk.dataSources.simpleDataSources.SimpleDataSourceLoadException;
+import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
+import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
+import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
+import org.broadinstitute.sting.gatk.traversals.TraverseLociByReference;
+import org.broadinstitute.sting.gatk.traversals.TraversalEngine;
+
+import net.sf.samtools.SAMRecord;
+import org.apache.log4j.Logger;
+
+import java.util.List;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.io.File;
+import java.io.IOException;

 /**
 * A micro-scheduling manager for N-way threaded execution of a traversal
 *
 */
 public class MicroManager {
+    private static long SHARD_SIZE = 5L;

-    public MicroManager( TraversalEngineExecutive TEfactory,  // makes worker units
-                         GenomeLoc[] locations,             // list of work to do
-                         int nThreadsToUse,                 // maximum number of threads to use to do the work
-                         int initialChunkSize ) {           // the initial chunk size for dividing up the work
-        // do a lot of work here to organize the computation
+    private File reads;
+    private FastaSequenceFile2 ref;
+
+    private TraverseLociByReference traversalEngine = null;
+
+    protected static Logger logger = Logger.getLogger(MicroManager.class);
+
+    public TraversalEngine getTraversalEngine() {
+        return traversalEngine;
    }

-    public void execute() {
-        // actually divide up the work, create worker units, and send them off to do work
+    public MicroManager( File reads,                          // the reads file
+                         File refFile,                        // the reference file driving the traversal
+                         int nThreadsToUse ) {                // maximum number of threads to use to do the work

+        this.reads = reads;
+        ref = new FastaSequenceFile2(refFile);
+        GenomeLoc.setupRefContigOrdering(ref);
+
+        traversalEngine = new TraverseLociByReference( reads, refFile, new java.util.ArrayList() );
    }
+
+
+    public void execute( Walker walker,                        // the analysis technique to use.
+                         List<GenomeLoc> locations ) {         // list of work to do
+        ShardStrategy shardStrategy = ShardStrategyFactory.shatter( ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
+                                                                    ref.getSequenceDictionary(),
+                                                                    SHARD_SIZE );
+        Object accumulator = ((LocusWalker<?,?>)walker).reduceInit();
+
+        for(Shard shard: shardStrategy) {
+            Iterator<SAMRecord> readShard = null;
+            try {
+                SAMBAMDataSource dataSource = new SAMBAMDataSource( Arrays.asList( new String[] { reads.getCanonicalPath() } ) );
+                readShard = dataSource.seek( shard.getGenomeLoc() );
+            }
+            catch( SimpleDataSourceLoadException ex ) {
+                throw new RuntimeException( ex );
+            }
+            catch( IOException ex ) {
+                throw new RuntimeException( ex );
+            }
+            
+            ReferenceProvider referenceProvider = new ReferenceProvider( new ReferenceIterator(ref) );
+            LocusContextProvider locusProvider = new LocusContextProvider( readShard );
+
+            accumulator = traversalEngine.traverse( walker, shard, referenceProvider, locusProvider, accumulator );
+        }
+
+        traversalEngine.printOnTraversalDone("loci", accumulator);        
+        walker.onTraversalDone(accumulator);        
+    }
+
+    
 }
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@ -1,31 +1,21 @@
 package org.broadinstitute.sting.gatk.traversals;

-import edu.mit.broad.picard.filter.FilteringIterator;
 import edu.mit.broad.picard.filter.SamRecordFilter;
 import edu.mit.broad.picard.reference.ReferenceSequence;
 import edu.mit.broad.picard.sam.SamFileHeaderMerger;
 import edu.mit.broad.picard.directed.IntervalList;
 import edu.mit.broad.picard.util.Interval;
-import net.sf.functionalj.Function1;
-import net.sf.functionalj.FunctionN;
-import net.sf.functionalj.Functions;
-import net.sf.functionalj.reflect.JdkStdReflect;
-import net.sf.functionalj.reflect.StdReflect;
 import net.sf.samtools.SAMFileHeader;
 import net.sf.samtools.SAMFileReader;
 import net.sf.samtools.SAMFileReader.ValidationStringency;
 import net.sf.samtools.SAMRecord;
 import net.sf.samtools.util.RuntimeIOException;
-import net.sf.samtools.util.CloseableIterator;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.iterators.*;
 import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
 import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
-import org.broadinstitute.sting.gatk.LocusContext;
 import org.broadinstitute.sting.utils.*;

 import java.io.*;
@ -66,16 +56,6 @@ public abstract class TraversalEngine {
    protected FastaSequenceFile2 refFile = null;              // todo: merge FastaSequenceFile2 into picard!
    protected ReferenceIterator refIter = null;

-    // Number of records (loci, reads) we've processed
-    protected long nRecords = 0;
-    // How many reads have we processed, along with those skipped for various reasons
-    protected int nReads = 0;
-    protected int nSkippedReads = 0;
-    protected int nUnmappedReads = 0;
-    protected int nNotPrimary = 0;
-    protected int nBadAlignments = 0;
-    protected int nSkippedIndels = 0;
-
    // Progress tracker for the sam file
    protected FileProgressTracker samReadingTracker = null;

@ -277,7 +257,7 @@ public abstract class TraversalEngine {
     * @param loc       Current location
     */
    public void printProgress(boolean mustPrint, final String type, GenomeLoc loc) {
-        final long nRecords = this.nRecords;
+        final long nRecords = TraversalStatistics.nRecords;
        final long curTime = System.currentTimeMillis();
        final double elapsed = (curTime - startTime) / 1000.0;
        //System.out.printf("Cur = %d, last print = %d, elapsed=%.2f, nRecords=%d, met=%b%n", curTime, lastProgressPrintTime, elapsed, nRecords, maxElapsedIntervalForPrinting(curTime));
@ -307,17 +287,20 @@ public abstract class TraversalEngine {
     * @param sum  The reduce result of the traversal
     * @param <T>  ReduceType of the traversal
     */
-    protected <T> void printOnTraversalDone(final String type, T sum) {
+    public <T> void printOnTraversalDone(final String type, T sum) {
        printProgress(true, type, null);
        logger.info("Traversal reduce result is " + sum);
        final long curTime = System.currentTimeMillis();
        final double elapsed = (curTime - startTime) / 1000.0;
        logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours%n", elapsed, elapsed / 60, elapsed / 3600));
-        logger.info(String.format("Traversal skipped %d reads out of %d total (%.2f%%)", nSkippedReads, nReads, (nSkippedReads * 100.0) / nReads));
-        logger.info(String.format("  -> %d unmapped reads", nUnmappedReads));
-        logger.info(String.format("  -> %d non-primary reads", nNotPrimary));
-        logger.info(String.format("  -> %d reads with bad alignments", nBadAlignments));
-        logger.info(String.format("  -> %d reads with indels", nSkippedIndels));
+        logger.info(String.format("Traversal skipped %d reads out of %d total (%.2f%%)",
+                    TraversalStatistics.nSkippedReads,
+                    TraversalStatistics.nReads,
+                    (TraversalStatistics.nSkippedReads * 100.0) / TraversalStatistics.nReads));
+        logger.info(String.format("  -> %d unmapped reads", TraversalStatistics.nUnmappedReads));
+        logger.info(String.format("  -> %d non-primary reads", TraversalStatistics.nNotPrimary));
+        logger.info(String.format("  -> %d reads with bad alignments", TraversalStatistics.nBadAlignments));
+        logger.info(String.format("  -> %d reads with indels", TraversalStatistics.nSkippedIndels));
    }

    // --------------------------------------------------------------------------------------------------------------
@ -520,27 +503,27 @@ public abstract class TraversalEngine {
            boolean result = false;
            String why = "";
            if (rec.getReadUnmappedFlag()) {
-                nUnmappedReads++;
+                TraversalStatistics.nUnmappedReads++;
                result = true;
                why = "Unmapped";
            } else if (rec.getNotPrimaryAlignmentFlag()) {
-                nNotPrimary++;
+                TraversalStatistics.nNotPrimary++;
                result = true;
                why = "Not Primary";
            } else if (rec.getAlignmentStart() == SAMRecord.NO_ALIGNMENT_START) {
-                nBadAlignments++;
+                TraversalStatistics.nBadAlignments++;
                result = true;
                why = "No alignment start";
-            } 
+            }
            else {
                result = false;
            }

            if (result) {
-                nSkippedReads++;
+                TraversalStatistics.nSkippedReads++;
                //System.out.printf("  [filter] %s => %b %s", rec.getReadName(), result, why);
            } else {
-                nReads++;
+                TraversalStatistics.nReads++;
            }
            return result;
        }
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraversalStatistics.java
@ -0,0 +1,36 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: hanna
+ * Date: Apr 8, 2009
+ * Time: 4:13:40 PM
+ *
+ * Holds a bunch of basic information about the traversal.
+ * TODO: Make this a class that can be passed around from the TraversalEngine to other entries that want to update it.
+ */
+public class TraversalStatistics {
+    // Number of records (loci, reads) we've processed
+    public static long nRecords;
+    // How many reads have we processed, along with those skipped for various reasons
+    public static int nReads;
+    public static int nSkippedReads;
+    public static int nUnmappedReads;
+    public static int nNotPrimary;
+    public static int nBadAlignments;
+    public static int nSkippedIndels;
+
+    static {
+        reset();
+    }
+
+    public static void reset() {
+        nRecords = 0;
+        nReads = 0;
+        nSkippedReads = 0;
+        nUnmappedReads = 0;
+        nNotPrimary = 0;
+        nBadAlignments = 0;
+        nSkippedIndels = 0;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLoci.java
@ -107,7 +107,7 @@ public class TraverseByLoci extends TraversalEngine {
        boolean done = false;
        LocusIterator iter = new LocusIteratorByHanger(filterIter);
        while (iter.hasNext() && !done) {
-            this.nRecords++;
+            TraversalStatistics.nRecords++;

            // actually get the read and hand it to the walker
            LocusContext locus = iter.next();
@ -127,8 +127,8 @@ public class TraverseByLoci extends TraversalEngine {

                //System.out.format("Working at %s\n", locus.getLocation().toString());

-                if (this.maxReads > 0 && this.nRecords > this.maxReads) {
-                    logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + this.nRecords));
+                if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
+                    logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
                    done = true;
                }

--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLociByReference.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByLociByReference.java
@ -61,7 +61,7 @@ public class TraverseByLociByReference extends TraverseByLoci {
        while ( interval.containsP(refSite.getLocation()) && ! done ) {
            logger.debug(String.format("  LocusFromReads is %s", locusFromReads == null ? null : locusFromReads.getLocation()));

-            this.nRecords++;
+            TraversalStatistics.nRecords++;
            GenomeLoc current = refSite.getLocation();
            
            // Iterate forward to get all reference ordered data covering this locus
@ -82,8 +82,8 @@ public class TraverseByLociByReference extends TraverseByLoci {
                locus.downsampleToCoverage(downsamplingCoverage);
            sum = walkAtLocus( walker, sum, locus, refSite, tracker );

-            if (this.maxReads > 0 && this.nRecords > this.maxReads) {
-                logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + this.nRecords));
+            if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
+                logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
                done = true;
            }
            
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReads.java
@ -76,7 +76,7 @@ public class TraverseByReads extends TraversalEngine {
        // copy the locations here in case we ever want to use the full list again later and so that we can remove efficiently
        LinkedList notYetTraversedLocations = new LinkedList(locations);
        while (samReadIter.hasNext() && !done) {
-            this.nRecords++;
+            TraversalStatistics.nRecords++;

            // get the next read
            final SAMRecord read = samReadIter.next();
@ -102,8 +102,8 @@ public class TraverseByReads extends TraversalEngine {
                    sum = walker.reduce(x, sum);
                }

-                if (this.maxReads > 0 && this.nRecords > this.maxReads) {
-                    logger.warn(String.format(("Maximum number of reads encountered, terminating traversal " + this.nRecords)));
+                if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
+                    logger.warn(String.format(("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords)));
                    done = true;
                }
            }
--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseByReference.java
@ -87,7 +87,7 @@ public class TraverseByReference extends TraverseByLoci {
        
        // We keep processing while the next reference location is within the interval
        while ( (interval == null || interval.containsP(refSite.getLocation())) && ! done ) {
-            this.nRecords++;
+            TraversalStatistics.nRecords++;
            GenomeLoc current = refSite.getLocation();

            // Iterate forward to get all reference ordered data covering this locus
@ -97,8 +97,8 @@ public class TraverseByReference extends TraverseByLoci {
            locus.setReferenceContig(refSite.getCurrentContig());
            sum = walkAtLocus( walker, sum, locus, refSite, tracker );

-            if (this.maxReads > 0 && this.nRecords > this.maxReads) {
-                logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + this.nRecords));
+            if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
+                logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
                done = true;
            }

--- a/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java
+++ b/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociByReference.java
@ -0,0 +1,93 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.gatk.LocusContext;
+import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
+import org.broadinstitute.sting.gatk.dataSources.providers.LocusContextProvider;
+import org.broadinstitute.sting.gatk.dataSources.providers.ReferenceProvider;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
+import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.iterators.ReferenceIterator;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.Utils;
+import org.apache.log4j.Logger;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.io.File;
+
+/**
+ * A simple, short-term solution to iterating over all reference positions over a series of
+ * genomic locations. Simply overloads the superclass traverse function to go over the entire
+ * interval's reference positions.
+ * mhanna - Added better data source integration.
+ * TODO: Gain confidence in this implementation and remove the original.
+ */
+public class TraverseLociByReference extends TraversalEngine {
+
+    /**
+     * our log, which we want to capture anything from this class
+     */
+    protected static Logger logger = Logger.getLogger(TraversalEngine.class);
+
+
+    public TraverseLociByReference(File reads, File ref, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods) {
+        super( reads, ref, rods );
+    }
+
+    public <M,T> T traverse(Walker<M,T> walker, ArrayList<GenomeLoc> locations) {
+        if ( locations.isEmpty() )
+            Utils.scareUser("Requested all locations be processed without providing locations to be processed!");
+
+        throw new UnsupportedOperationException("This traversal type not supported by TraverseLociByReference");
+    }
+
+    public <M,T> T traverse( Walker<M,T> walker,
+                             Shard shard,
+                             ReferenceProvider referenceProvider,
+                             LocusContextProvider locusProvider,
+                             T sum ) {
+        logger.debug(String.format("TraverseLociByReference.traverse Genomic interval is %s", shard.getGenomeLoc()));
+
+        if ( !(walker instanceof LocusWalker) )
+            throw new IllegalArgumentException("Walker isn't a loci walker!");
+
+        LocusWalker<M, T> locusWalker = (LocusWalker<M, T>)walker;
+        GenomeLoc loc = shard.getGenomeLoc();
+
+        // We keep processing while the next reference location is within the interval
+        for( long pos = loc.getStart(); pos <= loc.getStop(); pos++ ) {
+            GenomeLoc site = new GenomeLoc( loc.getContig(), pos );
+
+            TraversalStatistics.nRecords++;
+
+            // Iterate forward to get all reference ordered data covering this locus
+            final RefMetaDataTracker tracker = getReferenceOrderedDataAtLocus( site );
+
+            ReferenceIterator refSite = referenceProvider.getReferenceSequence( site );
+            LocusContext locus = locusProvider.getLocusContext( site );
+            locus.setReferenceContig( refSite.getCurrentContig() );
+
+            if ( DOWNSAMPLE_BY_COVERAGE )
+                locus.downsampleToCoverage(downsamplingCoverage);
+
+            final char refBase = refSite.getBaseAsChar();
+            final boolean keepMeP = locusWalker.filter(tracker, refBase, locus);
+            if (keepMeP) {
+                M x = locusWalker.map(tracker, refBase, locus);
+                sum = locusWalker.reduce(x, sum);
+            }
+
+            if (this.maxReads > 0 && TraversalStatistics.nRecords > this.maxReads) {
+                logger.warn(String.format("Maximum number of reads encountered, terminating traversal " + TraversalStatistics.nRecords));
+                break;
+            }
+
+            printProgress("loci", locus.getLocation());
+        }
+
+        return sum;
+    }
+}
--- a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@ -206,10 +206,13 @@ public class GenomeLoc implements Comparable<GenomeLoc> {
     * Useful utility function that parses a location string into a coordinate-order sorted
     * array of GenomeLoc objects
     *
-     * @param str
+     * @param str String representation of genome locs.  Null string corresponds to no filter.
     * @return Array of GenomeLoc objects corresponding to the locations in the string, sorted by coordinate order
     */
    public static ArrayList<GenomeLoc> parseGenomeLocs(final String str) {
+        // Null string means no filter.
+        if( str == null ) return null;
+
        // Of the form: loc1;loc2;...
        // Where each locN can be:
        // 'chr2', 'chr2:1000000' or 'chr2:1,000,000-2,000,000'