Added back end code support for Sharding based on genomic location for reads. Changed the sharding

code to take GenomeLocSortedSet instead of a list<GenomeLoc>, and added a bunch of much simplier and cleaner test cases. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@816 348d0f76-0448-11de-a6fe-93d51630548a
2009-05-26 20:57:46 +00:00 · 2009-05-26 20:57:46 +00:00 · d994544c47
parent 4edcdffe45
commit d994544c47
25 changed files with 732 additions and 527 deletions
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -13,6 +13,7 @@ import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.StingException;
 import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.cmdLine.ArgumentException;

 import java.util.ArrayList;
@ -110,7 +111,10 @@ public class GenomeAnalysisEngine {
        genericEngineSetup(strictness);

        // parse out any genomic location they've provided
-        List<GenomeLoc> locs = setupIntervalRegion();
+        List<GenomeLoc> locationsList = setupIntervalRegion();
+        GenomeLocSortedSet locs = null;
+        if (locationsList != null)
+            locs = GenomeLocSortedSet.createSetFromList(locationsList);

        // excute the microscheduler
        microScheduler.execute(my_walker, locs);
@ -192,7 +196,7 @@ public class GenomeAnalysisEngine {

        engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));

-        // we default interval files over the genome region strin
+        // we default interval files over the genome region string
        if (argCollection.intervals != null) {
            engine.setLocation(setupIntervalRegion());
        }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.dataSources.shards;

 import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.List;

@ -66,7 +67,7 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
     * @param startSize the starting size of the shard
     * @param lst       locations to iterate from
     */
-    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, List<GenomeLoc> lst) {
+    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
        super(dic, lst);
        this.baseSize = startSize;
        this.currentExp = 0;
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShard.java
@ -0,0 +1,61 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *         <p/>
+ *         Class IntervalReadShard
+ *         <p/>
+ *         This is the read shard that knowns about genomic intervals
+ */
+public class IntervalReadShard implements Shard {
+
+    /** a collection of genomic locations to interate over */
+    private GenomeLoc mSet;
+
+    IntervalReadShard(GenomeLoc myLocation) {
+        mSet = myLocation.clone();
+    }
+
+    /** @return the genome location represented by this shard */
+    public GenomeLoc getGenomeLoc() {
+        return mSet;
+    }
+
+    /**
+     * returns the type of shard, READ
+     *
+     * @return READ, indicating the shard type
+     */
+    public ShardType getShardType() {
+        return Shard.ShardType.READ;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.dataSources.shards;

 import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.List;

@ -63,7 +64,7 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
     * @param startSize the starting size of the shard
     * @param lst locations to iterate from
     */
-    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, List<GenomeLoc> lst) {
+    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
        super(dic, lst);
        this.nextShardSize = startSize;
    }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategy.java
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.dataSources.shards;

 import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.List;

@ -29,17 +30,17 @@ import java.util.List;
 * <p/>
 * Class LocusWindowShardStrategy
 * <p/>
- * This function knows how to shard on a genome loc boundry.  It guarantee's
- * a one-to-one mapping between a GenomeLoc and hte 
+ * This function knows how to shard on a genome loc boundry.  It guarantees
+ * a one-to-one mapping between a GenomeLoc and shard. 
 */
-public class IntervalShardStrategy extends LocusShardStrategy {
+public class LocusIntervalShardStrategy extends LocusShardStrategy {
    /**
     * the constructor, taking a seq dictionary to parse out contigs
     *
     * @param dic       the seq dictionary
     * @param intervals file
     */
-    IntervalShardStrategy(SAMSequenceDictionary dic, List<GenomeLoc> intervals) {
+    LocusIntervalShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
        super(dic, intervals);
    }

--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShard.java
@ -26,7 +26,7 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 * <p/>
 * Class Shard
 * <p/>
- * This is the base class for shards.  Right now it does little more then
+ * This is the base class for locus shards.  Right now it does little more then
 * wrap GenomeLoc (actually nothing more), but it's good to have the class
 * in place so it's easier to change guts later.
 */
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
@ -3,6 +3,8 @@ package org.broadinstitute.sting.gatk.dataSources.shards;
 import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.StingException;

 import java.util.Iterator;
 import java.util.List;
@ -25,11 +27,6 @@ import java.util.List;
 /**
 * @author aaron
 * @version 1.0
- * @date Apr 6, 2009
- * <p/>
- * Interface Shard
- * <p/>
- * The shard interface, which controls how data is divided for loci
 */
 public abstract class LocusShardStrategy implements ShardStrategy {

@ -50,10 +47,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
    private boolean nextContig = false;

    /** our interal list * */
-    private List<GenomeLoc> intervals = null;
-
-    /** our interal list * */
-    private int currentInterval = -1;
+    private GenomeLocSortedSet intervals = null;

    /** our log, which we want to capture anything from this class */
    private static Logger logger = Logger.getLogger(LocusShardStrategy.class);
@ -92,15 +86,15 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * @param dic       the seq dictionary
     * @param intervals file
     */
-    LocusShardStrategy(SAMSequenceDictionary dic, List<GenomeLoc> intervals) {
+    LocusShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
        this.dic = dic;
-        this.intervals = intervals;
-        this.currentInterval = 0;
+        this.intervals = intervals.clone();
        // set the starting point to the beginning interval
        if (intervals.size() < 1) {
            throw new IllegalArgumentException("Interval files must contain at least one interval");
        }
-        mLoc = new GenomeLoc(intervals.get(0).getContig(),intervals.get(0).getStart()-1,intervals.get(0).getStart()-1);
+        GenomeLoc loc = intervals.iterator().next();
+        mLoc = new GenomeLoc(loc.getContig(), loc.getStart() - 1, loc.getStart() - 1);
        if (dic.getSequences().size() > 0) {
            nextContig = true;
        }
@ -139,11 +133,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
        long proposedSize = nextShardSize();
        long nextStart = mLoc.getStop() + 1;

-        // if we don't have an interval file, use the non interval based approach.  Simple, eh?
+        // if we don't have an interval set, use the non interval based approach.  Simple, eh?
        if (this.intervals == null) {
            return nonIntervaledNext(length, proposedSize, nextStart);
        } else {
-            return intervaledNext(proposedSize, nextStart);
+            return intervaledNext(proposedSize);
        }

    }
@ -152,36 +146,24 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * Interval based next processing
     *
     * @param proposedSize the proposed size
-     * @param nextStart    where we start from
+     *
     * @return the shard that represents this data
     */
-    private Shard intervaledNext(long proposedSize, long nextStart) {
-        // get the current genome location
-        GenomeLoc loc = intervals.get(currentInterval);
-        if (nextStart + proposedSize >= loc.getStop()) {
-            // we need to get the rest of the current loc in a shard (return it), and move to the next location
-            proposedSize = loc.getStop() - nextStart;
-            lastGenomeLocSize = proposedSize;
+    private Shard intervaledNext(long proposedSize) {
+        if ((this.intervals == null) || (intervals.isEmpty())) {
+            throw new StingException("LocusShardStrategy: genomic regions list is empty in next() function.");
+        }

-            // the next sequence should start at the begining of the next contig
-            Shard ret = LocusShard.toShard(new GenomeLoc(intervals.get(currentInterval).getContigIndex(), nextStart, nextStart + proposedSize));
-
-            ++currentInterval;
-            if (intervals.size() > currentInterval) {
-                mLoc = new GenomeLoc(intervals.get(currentInterval).getContigIndex(), intervals.get(currentInterval).getStart() - 1, intervals.get(currentInterval).getStart() - 1);
-            }
-            return ret;// return
+        // get the first region in the list
+        GenomeLoc loc = intervals.iterator().next();

+        if (loc.getStop() - loc.getStart() <= proposedSize) {
+            intervals.removeRegion(loc);
+            return new IntervalReadShard(loc);
        } else {
-            // we need to move the next interval
-            lastGenomeLocSize = proposedSize;
-
-            // the next sequence should start at the begining of the next contig
-            Shard ret = LocusShard.toShard(new GenomeLoc(intervals.get(currentInterval).getContigIndex(), nextStart, nextStart + proposedSize - 1));
-
-            mLoc = new GenomeLoc(intervals.get(currentInterval).getContigIndex(), nextStart, nextStart + proposedSize - 1);
-
-            return ret;// return
+            GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStart() + proposedSize - 1);
+            intervals.removeRegion(subLoc);
+            return new IntervalReadShard(subLoc);
        }
    }

@ -191,6 +173,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * @param length       the length of the contig
     * @param proposedSize the proposed size
     * @param nextStart    the next start location
+     *
     * @return the shard to return to the user
     */
    private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) {
@ -241,11 +224,11 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * @return
     */
    public boolean hasNext() {
-        // if we don't have an interval file, use the non interval based approach.  Simple, eh?
+        // if we don't have an interval file, use the non interval based approach.
        if (this.intervals == null) {
            return nextContig;
        } else {
-            return (this.currentInterval < this.intervals.size());
+            return (this.intervals.size() > 0);
        }
    }

@ -267,13 +250,14 @@ public abstract class LocusShardStrategy implements ShardStrategy {
    /**
     * this allows a shard strategy to get the current interval.  It's kind of a hack, but for the
     * locusWindowShardStrategy it was the best approach.
+     *
     * @return
     */
    protected GenomeLoc getCurrentInterval() {
-        if (this.intervals == null || currentInterval < 0) {
+        if (this.intervals == null || intervals.size() < 1) {
            return null;
        }
-        return intervals.get(currentInterval);
+        return intervals.iterator().next();
    }

 }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategy.java
@ -0,0 +1,118 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.StingException;
+
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ *
+ * User: aaron
+ * Date: May 21, 2009
+ * Time: 4:13:53 PM
+ *
+ * The Broad Institute
+ * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
+ * This software and its documentation are copyright 2009 by the
+ * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
+ *
+ * This software is supplied without any warranty or guaranteed support whatsoever. Neither
+ * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
+ *
+ */
+
+
+/**
+ * @author aaron
+ *         <p/>
+ *         Class ReadByIntervalShardStrategy
+ *         <p/>
+ *         Impliments the sharding strategy for reads, given a list
+ *         of genomic locations.  Shards returned will be bounded by the interval,
+ *         but each provided interval may be split into a number of smaller regions.
+ */
+public class ReadIntervalShardStrategy implements ShardStrategy {
+
+    /** our storage of the genomic locations they'd like to shard over */
+    private final GenomeLocSortedSet regions;
+
+    /** their prefered size of the shard, we can modify this based on what we see in the shards */
+    private long size;
+
+    /** the sequence dictionary we'll use to lookup the contigs */
+    private final SAMSequenceDictionary dict;
+
+    /**
+     * change the recommended shard size for the next shard we generate.  The code will do it's
+     * best to respect this value, but there are no guarantees.
+     *
+     * @param size the next recommended shard size.
+     */
+    public void adjustNextShardSize(long size) {
+        this.size = size;
+    }
+
+    /**
+     * the default constructor
+     *
+     * @param dict  the sequence dictionary to use
+     * @param size the read count to iterate over
+     */
+    ReadIntervalShardStrategy(SAMSequenceDictionary dict, long size, GenomeLocSortedSet locations) {
+        if (locations == null || locations.isEmpty()) {
+            throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty.");
+        }
+        this.regions = locations.clone();
+        this.size = size;
+        this.dict = dict;
+    }
+
+    /**
+     * returns true if there are additional shards
+     * @return false if we're done processing shards
+     */
+    public boolean hasNext() {
+        return (!regions.isEmpty());
+    }
+
+    /**
+     * gets the next Shard
+     * @return the next shard
+     */
+    public Shard next() {
+        if ((this.regions == null) || (regions.isEmpty())) {
+            throw new StingException("ReadIntervalShardStrategy: genomic regions list is empty in next() function.");
+        }
+
+        // get the first region in the list
+        GenomeLoc loc = regions.iterator().next();
+
+        if (loc.getStop() - loc.getStart() <= this.size) {
+            regions.removeRegion(loc);
+            return new IntervalReadShard(loc);
+        } else {
+            GenomeLoc subLoc = new GenomeLoc(loc.getContigIndex(),loc.getStart(),loc.getStart()+size-1);
+            regions.removeRegion(subLoc);
+            return new IntervalReadShard(subLoc);
+        }
+        
+    }
+
+    /**
+     * we don't support the remove command
+     */
+    public void remove() {
+        throw new UnsupportedOperationException("ShardStrategies don't support remove()");
+    }
+
+    /**
+     * makes the ReadIntervalShard iterable, i.e. usable in a for loop.
+     * @return
+     */
+    public Iterator<Shard> iterator() {
+        return this;
+    }
+}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShard.java
@ -21,19 +21,20 @@ import org.broadinstitute.sting.utils.GenomeLoc;

 /**
 * @author aaron
- * @version 1.0
- * @date Apr 10, 2009
- * <p/>
- * Class ReadShard
- * <p/>
- * A class for sharded reads.
+ *         <p/>
+ *         ReadShard
+ *         <p/>
+ *         the base class for read shards.
 */
 public class ReadShard implements Shard {

    // the count of the reads we want to copy off
    private int size = 0;

-    // this is going to get gross
+    /**
+     * our tie in for the shard strategy.  This allows us to signal to the shard
+     * strategy that we've finished process, so it can indicate that we're out of reads
+     */
    private final ReadShardStrategy str;

    // the reference back to our read shard strategy
@ -63,7 +64,7 @@ public class ReadShard implements Shard {

    /** @return the genome location represented by this shard */
    public GenomeLoc getGenomeLoc() {
-        throw new UnsupportedOperationException("Reads based sharding isn't genome loc aware");
+        throw new UnsupportedOperationException("ReadShard isn't genome loc aware");
    }

    /** @return the genome location represented by this shard */
@ -71,7 +72,10 @@ public class ReadShard implements Shard {
        return size;
    }

-
+    /**
+     * this method is used as a backend, to signal to the sharding strategy that we've
+     * finished processing.  When we move to a more read-aware bam system this method could disappear. 
+     */
    public void signalDone() {
        strat.signalDone();
    }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
@ -5,10 +5,6 @@ import net.sf.samtools.SAMSequenceDictionary;
 import java.util.Iterator;

 /**
- *
- * User: aaron
- * Date: Apr 14, 2009
- * Time: 1:34:28 PM
 *
 * The Broad Institute
 * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
@ -28,7 +24,8 @@ import java.util.Iterator;
 * <p/>
 * Class ReadShardStrategy
 * <p/>
- * A descriptions should go here. Blame aaron if it's missing.
+ * The sharding strategy for reads using a simple counting mechanism.  Each read shard
+ * has a specific number of reads (default to 100K) which is configured in the constructor.
 */
 public class ReadShardStrategy implements ShardStrategy {

@ -46,7 +43,7 @@ public class ReadShardStrategy implements ShardStrategy {

    /**
     * the default constructor
-     * @param dic the dictionary
+     * @param dic the sequence dictionary to use
     * @param size the read count to iterate over
     */
    ReadShardStrategy(SAMSequenceDictionary dic, long size) {
@ -63,7 +60,7 @@ public class ReadShardStrategy implements ShardStrategy {
    }

    public Shard next() {
-        return new ReadShard((int)readCount, this);  
+        return new ReadShard((int)readCount, this);                                                                                                                      
    }

    public void remove() {
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@ -4,6 +4,7 @@ import net.sf.samtools.SAMSequenceDictionary;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.List;

@ -65,25 +66,6 @@ public class ShardStrategyFactory {

    }

-    /**
-     * convert between types
-     *
-     * @param strat       the strategy
-     * @param convertFrom convert from this strategy
-     * @return
-     */
-    static public ShardStrategy transitionToShardStrategy(SHATTER_STRATEGY strat, LocusShardStrategy convertFrom) {
-        switch (strat) {
-            case LINEAR:
-                return new LinearLocusShardStrategy(convertFrom);
-            case EXPONENTIAL:
-                return new ExpGrowthLocusShardStrategy(convertFrom);
-            default:
-                throw new StingException("Strategy: " + strat + " isn't implemented");
-
-        }
-    }
-

    /**
     * get a new shatter strategy
@ -93,31 +75,20 @@ public class ShardStrategyFactory {
     * @param startingSize the starting size
     * @return
     */
-    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, List<GenomeLoc> lst) {
+    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
        switch (strat) {
            case LINEAR:
                return new LinearLocusShardStrategy(dic, startingSize, lst);
            case EXPONENTIAL:
                return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
            case READS:
-                // return new ReadShardStrategy(dic, startingSize);
-                throw new StingException("Strategy: " + strat + " isn't implemented for intervals");
+                return new ReadIntervalShardStrategy(dic, startingSize, lst);
            case INTERVAL:
-                return new IntervalShardStrategy(dic, lst);
+                return new LocusIntervalShardStrategy(dic, lst);
            default:
                throw new StingException("Strategy: " + strat + " isn't implemented");
        }

    }

-    /**
-     * setup a reads shattering strategy
-     *
-     * @param readCount the number of reads to include in each shard
-     * @return
-     */
-    static public ShardStrategy shatterByReadCount(SAMSequenceDictionary dic, long readCount) {
-        return new ReadShardStrategy(dic, readCount);
-    }
-
 }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/ReferenceDataSource.java
@ -1,73 +0,0 @@
-package org.broadinstitute.sting.gatk.dataSources.simpleDataSources;
-
-import org.broadinstitute.sting.gatk.dataSources.shards.Shard;
-import org.broadinstitute.sting.gatk.iterators.BoundedReferenceIterator;
-import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-
-/**
- *
- * User: aaron
- * Date: Apr 6, 2009
- * Time: 3:55:21 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date Apr 6, 2009
- * <p/>
- * Class ReferenceDataSource
- * <p/>
- * A descriptions should go here. Blame aaron if it's missing.
- */
-public class ReferenceDataSource implements SimpleDataSource {
-
-    final protected IndexedFastaSequenceFile refFile;
-
-    /**
-     * Query the data source for a region of interest, specified by the genome location.
-     * The iterator will generate successive calls
-     *
-     * @param shard the genome location to extract data for
-     * @return an iterator of the appropriate type, that is limited by the region
-     */
-    public BoundedReferenceIterator seek(Shard shard) {
-        if (shard.getShardType() == Shard.ShardType.LOCUS) {
-            BoundedReferenceIterator ret = new BoundedReferenceIterator(refFile, shard.getGenomeLoc());
-            return ret;
-        } else {
-            throw new StingException("ReferenceDataSource can only take LocusShards");
-        }
-
-    }
-
-    public ReferenceDataSource(String refFileName) throws SimpleDataSourceLoadException {
-        if (refFileName == null) {
-            throw new SimpleDataSourceLoadException("ReferenceDataSource: refFileName passed in is null");
-        }
-        File infile = new File(refFileName);
-        if (!infile.canRead()) {
-            throw new SimpleDataSourceLoadException("ReferenceDataSource: Unable to load file: " + refFileName);
-        }
-        try {
-            refFile = new IndexedFastaSequenceFile(new File(refFileName));
-        }
-        catch( FileNotFoundException ex ) {
-            throw new SimpleDataSourceLoadException( "Unable to find reference file", ex );
-        }
-    }
-}
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMDataSource.java
@ -35,9 +35,7 @@ import java.util.List;
 * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
 */
 public class SAMDataSource implements SimpleDataSource {
-    /**
-     * Backing support for reads.
-     */
+    /** Backing support for reads. */
    private Reads reads = null;

    /** our SAM data files */
@ -52,9 +50,7 @@ public class SAMDataSource implements SimpleDataSource {
    // our list of readers
    private final List<File> samFileList = new ArrayList<File>();

-    /**
-     * SAM header file.
-     */
+    /** SAM header file. */
    private final SAMFileHeader header;

    // used for the reads case, the last count of reads retrieved
@ -90,14 +86,14 @@ public class SAMDataSource implements SimpleDataSource {

        }

-        header  = createHeaderMerger().getMergedHeader();
+        header = createHeaderMerger().getMergedHeader();
    }

    /**
-     * Load up a sam file.
+     * Load a SAM/BAM, given an input file.
     *
     * @param samFile the file name
-     * @return a SAMFileReader for the file
+     * @return a SAMFileReader for the file, null if we're attempting to read a list
     */
    private SAMFileReader initializeSAMFile(final File samFile) {
        if (samFile.toString().endsWith(".list")) {
@ -115,7 +111,7 @@ public class SAMDataSource implements SimpleDataSource {

    /**
     * <p>
-     * seek
+     * seekLocus
     * </p>
     *
     * @param location the genome location to extract data for
@ -123,17 +119,16 @@ public class SAMDataSource implements SimpleDataSource {
     */
    public StingSAMIterator seekLocus(GenomeLoc location) throws SimpleDataSourceLoadException {

-        // right now this is pretty damn heavy, it copies the file list into a reader list every time
+        // right now this is very heavy, it copies the file list into a reader list every time
        SamFileHeaderMerger headerMerger = createHeaderMerger();

        // make a merging iterator for this record
        MergingSamRecordIterator2 iter = new MergingSamRecordIterator2(headerMerger);

-        // we do different things for locus and read modes
        iter.queryOverlapping(location.getContig(), (int) location.getStart(), (int) location.getStop() + 1);

        // return the iterator
-        return StingSAMIteratorAdapter.adapt( reads, iter );
+        return StingSAMIteratorAdapter.adapt(reads, iter);
    }

    /**
@ -149,17 +144,17 @@ public class SAMDataSource implements SimpleDataSource {
        if (shard.getShardType() == Shard.ShardType.READ) {
            iterator = seekRead((ReadShard) shard);
            iterator = TraversalEngine.applyDecoratingIterators(true,
-                                                                iterator,
-                                                                reads.getDownsamplingFraction(),
-                                                                reads.getMaxOnTheFlySorts(),
-                                                                reads.getSafetyChecking());
+                    iterator,
+                    reads.getDownsamplingFraction(),
+                    reads.getMaxOnTheFlySorts(),
+                    reads.getSafetyChecking());
        } else if (shard.getShardType() == Shard.ShardType.LOCUS) {
            iterator = seekLocus(shard.getGenomeLoc());
            iterator = TraversalEngine.applyDecoratingIterators(false,
-                                                                iterator,
-                                                                reads.getDownsamplingFraction(),
-                                                                reads.getMaxOnTheFlySorts(),
-                                                                reads.getSafetyChecking());
+                    iterator,
+                    reads.getDownsamplingFraction(),
+                    reads.getMaxOnTheFlySorts(),
+                    reads.getSafetyChecking());
        } else {
            throw new StingException("seek: Unknown shard type");
        }
@ -168,26 +163,26 @@ public class SAMDataSource implements SimpleDataSource {
    }


-    /**
-     * If we're in by-read mode, this indicates if we want
-     * to see unmapped reads too.  Only seeing mapped reads
-     * is much faster, but most BAM files have significant
-     * unmapped read counts.
-     *
-     * @param seeUnMappedReads true to see unmapped reads, false otherwise
-     */
-    public void viewUnmappedReads(boolean seeUnMappedReads) {
-        includeUnmappedReads = seeUnMappedReads;
-    }
-
    /**
     * Gets the (potentially merged) SAM file header.
+     *
     * @return SAM file header.
     */
    public SAMFileHeader getHeader() {
-        return header; 
+        return header;
    }

+    /**
+     * create the merging header.
+     *
+     * @return a SamFileHeaderMerger that includes the set of SAM files we were created with
+     */
+    private SamFileHeaderMerger createHeaderMerger() {
+        List<SAMFileReader> lst = GetReaderList();
+        return new SamFileHeaderMerger(lst, SORT_ORDER);
+    }
+
+
    /**
     * <p>
     * seek
@ -203,10 +198,8 @@ public class SAMDataSource implements SimpleDataSource {
        MergingSamRecordIterator2 iter = null;

        if (!intoUnmappedReads) {
-            // make a merging iterator for this record
            iter = new MergingSamRecordIterator2(headerMerger);
-
-           bound = fastMappedReadSeek(shard.getSize(), iter);
+            bound = fastMappedReadSeek(shard.getSize(), iter);
        }
        if ((bound == null || intoUnmappedReads) && includeUnmappedReads) {
            if (iter != null) {
@ -218,18 +211,21 @@ public class SAMDataSource implements SimpleDataSource {

        if (bound == null) {
            shard.signalDone();
-            bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), 0);
+            bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), 0);
        }
        return bound;
    }

-    private SamFileHeaderMerger createHeaderMerger() {
-        // TODO: make extremely less horrible
-        List<SAMFileReader> lst = GetReaderList();
-
-        // now merge the headers
-        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(lst, SORT_ORDER);
-        return headerMerger;
+    /**
+     * If we're in by-read mode, this indicates if we want
+     * to see unmapped reads too.  Only seeing mapped reads
+     * is much faster, but most BAM files have significant
+     * unmapped read counts.
+     *
+     * @param seeUnMappedReads true to see unmapped reads, false otherwise
+     */
+    public void viewUnmappedReads(boolean seeUnMappedReads) {
+        includeUnmappedReads = seeUnMappedReads;
    }

    /**
@ -242,7 +238,6 @@ public class SAMDataSource implements SimpleDataSource {
     * @throws SimpleDataSourceLoadException
     */
    private BoundedReadIterator toUnmappedReads(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException {
-        BoundedReadIterator bound;// is this the first time we're doing this?
        int count = 0;
        SAMRecord d = null;
        while (iter.hasNext()) {
@ -270,15 +265,15 @@ public class SAMDataSource implements SimpleDataSource {
            return null;
        }

-        // we're good, increment our read cout
+        // we're not out of unmapped reads, so increment our read cout
        this.readsTaken += readCount;
-        return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
+        return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);

    }


    /**
-     * unmapped reads.
+     * A seek function for unmapped reads.
     *
     * @param readCount how many reads to retrieve
     * @param iter      the iterator to use
@ -286,16 +281,10 @@ public class SAMDataSource implements SimpleDataSource {
     * @throws SimpleDataSourceLoadException
     */
    private BoundedReadIterator fastMappedReadSeek(long readCount, MergingSamRecordIterator2 iter) throws SimpleDataSourceLoadException {
-        BoundedReadIterator bound;// is this the first time we're doing this?
        if (lastReadPos == null) {
-            lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
-            iter.queryContained(lastReadPos.getContig(), 1, -1);
-            bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
-            this.readsTaken = readCount;
-        }
-        // we're not at the beginning, not at the end, so we move forward with our ghastly plan...
-        else {
-
+            return InitialReadIterator(readCount, iter);
+        } else {
+            BoundedReadIterator bound;
            iter.queryContained(lastReadPos.getContig(), (int) lastReadPos.getStop(), -1);

            // move the number of reads we read from the last pos
@ -338,7 +327,7 @@ public class SAMDataSource implements SimpleDataSource {
                        SamFileHeaderMerger mg = createHeaderMerger();
                        iter = new MergingSamRecordIterator2(mg);
                        iter.queryContained(lastReadPos.getContig(), 1, Integer.MAX_VALUE);
-                        return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter),readCount);
+                        return new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
                    }
                }
            }
@ -363,11 +352,28 @@ public class SAMDataSource implements SimpleDataSource {
                throw new StingException("Danger: weve run out reads in fastMappedReadSeek");
                //return null;
            }
-            bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads,iter), readCount);
+            bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
+
+            // return the iterator
+            return bound;
        }


-        // return the iterator
+    }
+
+    /**
+     * set the initial iterator
+     *
+     * @param readCount the number of reads
+     * @param iter      the merging iterator
+     * @return a bounded read iterator at the first read position in the file.
+     */
+    private BoundedReadIterator InitialReadIterator(long readCount, MergingSamRecordIterator2 iter) {
+        BoundedReadIterator bound;
+        lastReadPos = new GenomeLoc(iter.getHeader().getSequenceDictionary().getSequence(0).getSequenceIndex(), 0, 0);
+        iter.queryContained(lastReadPos.getContig(), 1, -1);
+        bound = new BoundedReadIterator(StingSAMIteratorAdapter.adapt(reads, iter), readCount);
+        this.readsTaken = readCount;
        return bound;
    }

--- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
 import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.threading.ThreadPoolMonitor;

 import java.io.File;
@ -61,7 +62,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Reduce
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
    }

-    public Object execute( Walker walker, List<GenomeLoc> intervals ) {
+    public Object execute( Walker walker, GenomeLocSortedSet intervals ) {
        // Fast fail for walkers not supporting TreeReducible interface.
        if( !(walker instanceof TreeReducible) )
            throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");
--- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
 import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
 import org.broadinstitute.sting.gatk.Reads;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.io.File;
 import java.util.List;
@ -31,7 +32,7 @@ public class LinearMicroScheduler extends MicroScheduler {
     * @param walker    Computation to perform over dataset.
     * @param locations Subset of the dataset over which to walk.
     */
-    public Object execute(Walker walker, List<GenomeLoc> locations) {
+    public Object execute(Walker walker, GenomeLocSortedSet locations) {
        ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations);

        walker.initialize();
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -20,6 +20,7 @@ import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
 import org.broadinstitute.sting.gatk.Reads;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;

 import java.io.File;
@ -101,7 +102,7 @@ public abstract class MicroScheduler {
     * @param intervals A list of intervals over which to walk.  Null for whole dataset.
     * @return the return type of the walker
     */
-    public abstract Object execute( Walker walker, List<GenomeLoc> intervals);
+    public abstract Object execute( Walker walker, GenomeLocSortedSet intervals);

    /**
     * Get the sharding strategy given a driving data source.
@ -110,7 +111,7 @@ public abstract class MicroScheduler {
     * @param intervals Intervals to use when limiting sharding.
     * @return Sharding strategy for this driving data source.
     */
-    protected ShardStrategy getShardStrategy( Walker walker, ReferenceSequenceFile drivingDataSource, List<GenomeLoc> intervals ) {
+    protected ShardStrategy getShardStrategy( Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals ) {
        ShardStrategy shardStrategy = null;

        if( walker instanceof LocusWalker ) {
--- a/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@ -479,7 +479,7 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Cloneable {
     * @return A GenomeLoc with the same contents as the current loc.
     */
    @Override
-    public Object clone() {
+    public GenomeLoc clone() {
        return new GenomeLoc(this);
    }

--- a/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
+++ b/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
@ -6,6 +6,7 @@ import net.sf.samtools.SAMSequenceRecord;
 import java.util.AbstractSet;
 import java.util.ArrayList;
 import java.util.Iterator;
+import java.util.List;

 /**
 *
@ -26,22 +27,22 @@ import java.util.Iterator;

 /**
 * @author aaron
- * @version 1.0
- * @date May 22, 2009
- * <p/>
- * Class GenomeLocCollection
- * <p/>
- * a set of genome locations. This collection is self sorting,
- * and will merge genome locations that are overlapping. The remove function
- * will also remove a region from the list, if the region to remove is a
- * partial interval of a region in the collection it will remove the region from
- * that element.
+ *         <p/>
+ *         Class GenomeLocCollection
+ *         <p/>
+ *         a set of genome locations. This collection is self sorting,
+ *         and will merge genome locations that are overlapping. The remove function
+ *         will also remove a region from the list, if the region to remove is a
+ *         partial interval of a region in the collection it will remove the region from
+ *         that element.
 */
 public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
    // our private storage for the GenomeLoc's
    private final ArrayList<GenomeLoc> mArray = new ArrayList<GenomeLoc>();

-    public GenomeLocSortedSet() {}
+    /** default constructor */
+    public GenomeLocSortedSet() {
+    }

    /**
     * get an iterator over this collection
@ -72,7 +73,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {

    /**
     * add a genomeLoc to the collection, simply inserting in order into the set
+     *
     * @param e the GenomeLoc to add
+     *
     * @return true
     */
    public boolean add(GenomeLoc e) {
@ -82,7 +85,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
        int index = 0;
        while (index < mArray.size()) {
            if (!e.isPast(mArray.get(index))) {
-                mArray.add(index,e);
+                mArray.add(index, e);
                return true;
            }
            ++index;
@ -96,6 +99,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
     * If it's not overlapping then we add it in sorted order.
     *
     * @param e the GenomeLoc to add to the collection
+     *
     * @return true, if the GenomeLoc could be added to the collection
     */
    public boolean addRegion(GenomeLoc e) {
@ -112,7 +116,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
        for (GenomeLoc g : mArray) {
            if (g.contiguousP(e)) {
                GenomeLoc c = g.merge(e);
-                mArray.set(mArray.indexOf(g),c);
+                mArray.set(mArray.indexOf(g), c);
                haveAdded = true;
            } else if ((g.getContigIndex() == e.getContigIndex()) &&
                    (e.getStart() < g.getStart()) && !haveAdded) {
@ -132,7 +136,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
    /**
     * remove an element from the set.  Given a specific genome location, this function will
     * remove all regions in the element set that overlap the specified region.
+     *
     * @param e the genomic range to remove
+     *
     * @return true if a removal action was performed, false if the collection was unchanged.
     */
    public boolean removeRegion(GenomeLoc e) {
@ -148,7 +154,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
         */
        for (GenomeLoc g : mArray) {
            if (g.overlapsP(e)) {
-                if (g.compareTo(e) == 0) {
+                if (g.equals(e)) {
                    mArray.remove(mArray.indexOf(g));
                    return true;
                } else if (g.containsP(e)) {
@ -162,11 +168,15 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
                     * |------|  + |--------|
                     *
                     */
-                    GenomeLoc before = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart()-1);
+                    GenomeLoc before = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
                    GenomeLoc after = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
                    int index = mArray.indexOf(g);
-                    mArray.add(index, after);
-                    mArray.add(index, before);
+                    if (after.getStop() - after.getStart() > 0) {
+                        mArray.add(index, after);
+                    }
+                    if (before.getStop() - before.getStart() > 0) {
+                        mArray.add(index, before);
+                    }
                    mArray.remove(mArray.indexOf(g));
                    return true;
                } else if (e.containsP(g)) {
@ -194,12 +204,12 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
                     *       |------------- g ----------|
                     * |------------ e -----------|
                     *
-                      */
+                     */

                    if (e.getStart() < g.getStart()) {
-                        l = new GenomeLoc(g.getContigIndex(), e.getStop()+1, g.getStop());
+                        l = new GenomeLoc(g.getContigIndex(), e.getStop() + 1, g.getStop());
                    } else {
-                        l = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart()-1);
+                        l = new GenomeLoc(g.getContigIndex(), g.getStart(), e.getStart() - 1);
                    }
                    // replace g with the new region
                    mArray.set(mArray.indexOf(g), l);
@ -212,14 +222,45 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {

    /**
     * create a list of genomic locations, given a reference sequence
+     *
     * @param dict the sequence dictionary to create a collection from
+     *
     * @return the GenomeLocSet of all references sequences as GenomeLoc's
     */
    public static GenomeLocSortedSet createSetFromSequenceDictionary(SAMSequenceDictionary dict) {
        GenomeLocSortedSet returnSortedSet = new GenomeLocSortedSet();
        for (SAMSequenceRecord record : dict.getSequences()) {
-            returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(),1,record.getSequenceLength()));
+            returnSortedSet.add(new GenomeLoc(record.getSequenceIndex(), 1, record.getSequenceLength()));
        }
        return returnSortedSet;
    }
+
+    /**
+     * Create a sorted genome location set from a list of GenomeLocs.
+     * @param locs the list<GenomeLoc>
+     * @return the sorted genome loc list
+     */
+    public static GenomeLocSortedSet createSetFromList(List<GenomeLoc> locs) {
+        GenomeLocSortedSet set = new GenomeLocSortedSet();
+        for (GenomeLoc l: locs) {
+            set.add(l);
+        }
+        return set;
+    }
+
+
+    /**
+     * return a deep copy of this collection.
+     *
+     * @return a new GenomeLocSortedSet, indentical to the current GenomeLocSortedSet.
+     */
+    public GenomeLocSortedSet clone() {
+        GenomeLocSortedSet ret = new GenomeLocSortedSet();
+        for (GenomeLoc loc : this.mArray) {
+            // ensure a deep copy
+            ret.mArray.add(new GenomeLoc(loc.getContigIndex(), loc.getStart(), loc.getStop()));
+        }
+        return ret;
+    }
+
 }
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShardTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalReadShardTest.java
@ -0,0 +1,73 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertTrue;
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *         <p/>
+ *         Class IntervalReadShardTest
+ *         <p/>
+ *         Tests for the IntervalReadShard class.
+ */
+public class IntervalReadShardTest extends BaseTest {
+
+    private IntervalReadShard shard = null;
+    private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
+    private static final int NUMBER_OF_CHROMOSOMES = 5;
+    private static final int STARTING_CHROMOSOME = 1;
+    private static final int CHROMOSOME_SIZE = 1000;
+
+    @Before
+    public void setup() {
+        GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
+    }
+
+
+    @Test
+    public void simpleReturn() {
+        GenomeLoc loc = new GenomeLoc(1, 1, 100);
+        shard = new IntervalReadShard(loc);
+        assertTrue(shard.getGenomeLoc().equals(loc));
+    }
+
+    @Test
+    public void ensureNotReference() {
+        GenomeLoc loc = new GenomeLoc(1, 1, 100);
+        shard = new IntervalReadShard(loc);
+        assertTrue(shard.getGenomeLoc() != loc && shard.getGenomeLoc().equals(loc));
+    }
+
+}
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/IntervalShardStrategyTest.java
@ -1,142 +0,0 @@
-package org.broadinstitute.sting.gatk.dataSources.shards;
-
-import static junit.framework.Assert.assertEquals;
-import static junit.framework.Assert.fail;
-import net.sf.samtools.SAMSequenceDictionary;
-import net.sf.samtools.SAMSequenceRecord;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
-import org.junit.*;
-
-import java.io.File;
-import java.util.ArrayList;
-
-/**
- *
- * User: aaron
- * Date: May 14, 2009
- * Time: 3:52:57 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date May 14, 2009
- * <p/>
- * Class LocusWindowShardStrategyTest
- * <p/>
- * LocusWindowShardStrategy tests
- */
-public class IntervalShardStrategyTest extends BaseTest {
-
-    private static FastaSequenceFile2 seq;
-
-    /**
-     * This function (because of the @BeforeClass tag) gets called only once ever,
-     * before any tests are run
-     */
-    @BeforeClass
-    public static void doBeforeAnyTests() {
-        seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
-    }
-
-    /**
-     * Tears down the test fixture after each call.
-     * <p/>
-     * Called after every test case method.
-     */
-    @AfterClass
-    public static void doAfterAllTests() {
-
-    }
-
-    /**
-     * This function does the setup of our parser, before each method call.
-     * <p/>
-     * Called before every test case method.
-     */
-    @Before
-    public void doForEachTest() {
-
-    }
-
-    /**
-     * Tears down the test fixture after each call.
-     * <p/>
-     * Called after every test case method.
-     */
-    @After
-    public void undoForEachTest() {
-
-    }
-
-    /** Tests that we got a string parameter in correctly */
-    @Test
-    public void testIntervalGenomeCycle() throws InterruptedException {
-        logger.warn("Executing testIntervalGenomeCycle");
-
-        SAMSequenceDictionary dic = seq.getSequenceDictionary();
-
-
-        // setup a list of genome locs that represent the whole file
-        SAMSequenceRecord s = dic.getSequence(1);
-        int stop = s.getSequenceLength();
-        int size = 10000;
-        int location = 1;
-
-        GenomeLoc.setupRefContigOrdering(dic);
-        // keep track of the number of genome locs we build
-        int genomeLocs = 0;
-        ArrayList<GenomeLoc> locations = new ArrayList<GenomeLoc>();
-        try {
-            while (location + size < stop) {
-                // lets make up some fake locations
-                GenomeLoc gl = new GenomeLoc(s.getSequenceName(), location, location + size - 1);
-                logger.debug("loc = " + location);
-
-                // let's move the location up, with a size space
-                location += (size * 2);
-
-                // add our current location to the list
-                locations.add(gl);
-
-                // add another genome location
-                ++genomeLocs;
-            }
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        logger.debug("Location count = " + genomeLocs);
-        ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.INTERVAL, seq.getSequenceDictionary(), 0, locations);
-        int shardCount = 0;
-        try {
-            for (Shard sh : strategy) {
-                GenomeLoc l = sh.getGenomeLoc();
-                GenomeLoc truth = locations.get(shardCount);
-                if (l.compareTo(truth) != 0) {
-                    String truthStr = truth.getContig() + ":" + truth.getStart() + ":" + truth.getStop();
-                    String lStr = l.getContig() + ":" + l.getStart() + ":" + l.getStop();
-                    fail("Genome loc " + truthStr + " doesn't equal " + lStr);
-                }
-                shardCount++;
-            }
-            assertEquals(shardCount, genomeLocs);
-
-        } catch (Exception e) {
-            e.printStackTrace();
-            fail("testIntervalGenomeCycle: ne exception expected");
-        }
-    }
-
-}
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LocusIntervalShardStrategyTest.java
@ -0,0 +1,79 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
+import org.broadinstitute.sting.BaseTest;
+import org.junit.Before;
+import org.junit.Test;
+import static org.junit.Assert.assertTrue;
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *         <p/>
+ *         Class LocusIntervalShardStrategyTest
+ *         <p/>
+ *         Tests the LocusIntervalShardStrategy class.
+ */
+public class LocusIntervalShardStrategyTest extends BaseTest {
+    private GenomeLocSortedSet mSortedSet = null;
+    private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
+    private static final int NUMBER_OF_CHROMOSOMES = 5;
+    private static final int STARTING_CHROMOSOME = 1;
+    private static final int CHROMOSOME_SIZE = 1000;
+    private LocusIntervalShardStrategy strat = null;
+
+    @Before
+    public void setup() {
+        GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
+        mSortedSet = new GenomeLocSortedSet();
+    }
+
+    @Test
+    public void testOneToOneness() {
+        for (int x = 0; x < 100; x++) {
+            GenomeLoc loc = new GenomeLoc(0,(x*10)+1, (x*10)+8);
+            mSortedSet.add(loc);
+        }
+        strat = new LocusIntervalShardStrategy(header.getSequenceDictionary(),mSortedSet);
+        int counter = 0;
+        while (strat.hasNext()) {
+            ++counter;
+            GenomeLoc loc = strat.next().getGenomeLoc();
+            long stop = loc.getStop();
+            long start = loc.getStart();
+            long length =  stop - start;
+            assertTrue(length == 7);
+        }
+        assertTrue(counter == 100);
+
+    }
+
+}
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ReadIntervalShardStrategyTest.java
@ -0,0 +1,124 @@
+package org.broadinstitute.sting.gatk.dataSources.shards;
+
+import org.junit.Test;
+import org.junit.Before;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
+import org.broadinstitute.sting.BaseTest;
+import net.sf.samtools.SAMFileHeader;
+
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * @author aaron
+ *         <p/>
+ *         Class ReadIntervalShardStrategyTest
+ *         <p/>
+ *         Tests the ReadIntervalShardStrategy class
+ */
+public class ReadIntervalShardStrategyTest extends BaseTest {
+
+    private GenomeLocSortedSet mSortedSet = null;
+    private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
+    private static final int NUMBER_OF_CHROMOSOMES = 5;
+    private static final int STARTING_CHROMOSOME = 1;
+    private static final int CHROMOSOME_SIZE = 1000;
+
+    @Before
+    public void setup() {
+        GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
+        mSortedSet = new GenomeLocSortedSet();
+    }
+
+    @Test(expected = StingException.class)
+    public void testExceptionOnEmpty() {
+        ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+    }
+
+    @Test
+    public void testSingleChromosomeFunctionality() {
+        GenomeLoc loc = new GenomeLoc(1, 1, 1000);
+        mSortedSet.add(loc);
+        ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+        int counter = 0;
+        while (strat.hasNext()) {
+            Shard d = strat.next();
+            counter++;
+        }
+        assertEquals(10, counter);
+    }
+
+    @Test
+    public void testMultipleChromosomeFunctionality() {
+        for (int x = 0; x < 5; x++) {
+            GenomeLoc loc = new GenomeLoc(x, 1, 1000);
+            mSortedSet.add(loc);
+        }
+        ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+        int counter = 0;
+        while (strat.hasNext()) {
+            Shard d = strat.next();
+            counter++;
+        }
+        assertEquals(50, counter);
+    }
+
+    @Test
+    public void testOddSizeShardFunctionality() {
+        for (int x = 0; x < 5; x++) {
+            GenomeLoc loc = new GenomeLoc(x, 1, 1000);
+            mSortedSet.add(loc);
+        }
+        ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 789, mSortedSet);
+        int counter = 0;
+        while (strat.hasNext()) {
+            Shard d = strat.next();
+            if (counter % 2 == 0) {
+                assertEquals(1, d.getGenomeLoc().getStart());
+                assertEquals(789, d.getGenomeLoc().getStop());
+            } else {
+                assertEquals(790, d.getGenomeLoc().getStart());
+                assertEquals(1000, d.getGenomeLoc().getStop());
+            }
+            counter++;
+        }
+        assertEquals(10, counter);
+    }
+
+    @Test(expected = UnsupportedOperationException.class)
+    public void testRemove() {
+        GenomeLoc loc = new GenomeLoc(1, 1, 1000);
+        mSortedSet.add(loc);
+        ReadIntervalShardStrategy strat = new ReadIntervalShardStrategy(header.getSequenceDictionary(), 100, mSortedSet);
+        strat.remove();
+    }
+
+}
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactoryTest.java
@ -4,10 +4,14 @@ import static junit.framework.Assert.assertEquals;
 import static junit.framework.Assert.fail;
 import net.sf.samtools.SAMSequenceDictionary;
 import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.sam.ArtificialSamUtils;
 import org.broadinstitute.sting.utils.fasta.FastaSequenceFile2;
 import org.junit.*;
+import static org.junit.Assert.assertTrue;

 import java.io.File;
 import java.util.ArrayList;
@ -32,139 +36,62 @@ import java.util.ArrayList;
 /**
 * @author aaron
 * @version 1.0
- * @date Apr 8, 2009
- * <p/>
- * Class ShardFactoryTest
- * <p/>
- * Tests the shard strategy factory.  This tests the whole sharding interface, and should be
- * split in the future into seperate test cases.
- * TODO: split out for the seperate sharding classes
 */
 public class ShardStrategyFactoryTest extends BaseTest {

-    private static FastaSequenceFile2 seq;
+    private SAMFileHeader header = ArtificialSamUtils.createArtificialSamHeader(NUMBER_OF_CHROMOSOMES, STARTING_CHROMOSOME, CHROMOSOME_SIZE);
+    private static final int NUMBER_OF_CHROMOSOMES = 5;
+    private static final int STARTING_CHROMOSOME = 1;
+    private static final int CHROMOSOME_SIZE = 1000;
+    private GenomeLocSortedSet set = null;

-    /**
-     * This function (because of the @BeforeClass tag) gets called only once ever,
-     * before any tests are run
-     */
-    @BeforeClass
-    public static void doBeforeAnyTests() {
-        seq = new FastaSequenceFile2(new File(seqLocation + "/references/Homo_sapiens_assembly18/v0/Homo_sapiens_assembly18.fasta"));
-    }

-    /**
-     * Tears down the test fixture after each call.
-     * <p/>
-     * Called after every test case method.
-     */
-    @AfterClass
-    public static void doAfterAllTests() {
-
-    }
-
-    /**
-     * This function does the setup of our parser, before each method call.
-     * <p/>
-     * Called before every test case method.
-     */
    @Before
-    public void doForEachTest() {
-
+    public void setup() {
+        GenomeLoc.setupRefContigOrdering(header.getSequenceDictionary());
+        set = new GenomeLocSortedSet();
    }

-    /**
-     * Tears down the test fixture after each call.
-     * <p/>
-     * Called after every test case method.
-     */
-    @After
-    public void undoForEachTest() {
-
-    }
-
-    /** Tests that we got a string parameter in correctly */
    @Test
-    public void testFullGenomeCycle() {
-        logger.warn("Executing testFullGenomeCycle");
-
-        GenomeLoc.setupRefContigOrdering(seq.getSequenceDictionary());
-
-        ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 100000);
-        int shardCount = 0;
-        try {
-
-            for (Shard s : strategy) {
-                GenomeLoc l = s.getGenomeLoc();
-                //logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig());
-                shardCount++;
-            }
-
-            // check to make sure we got apple shards
-            //logger.debug("shardCount : " + shardCount + " seq size = " + seq.getSequenceDictionary().size());
-
-        } catch (Exception e) {
-            e.printStackTrace();
-            fail("We Shouldn't of seen an exception! : " + e.getMessage() + "; shard count " + shardCount);
-        }
+    public void testReadNonInterval() {
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100);
+        assertTrue(st instanceof ReadShardStrategy);
    }

-
-    /** Tests that we got a string parameter in correctly */
    @Test
-    public void testIntervalGenomeCycle() throws InterruptedException {
-        logger.warn("Executing testIntervalGenomeCycle");
-
-        SAMSequenceDictionary dic = seq.getSequenceDictionary();
-        SAMSequenceRecord s = dic.getSequence(1);
-        // Character stream writing
-
-       
-        int stop = s.getSequenceLength();
-        int size = 10000;
-        int location = 1;
-        GenomeLoc.setupRefContigOrdering(dic);
-        // keep track of the number of genome locs we build
-        int genomeLocs = 0;
-        ArrayList<GenomeLoc> locations = new ArrayList<GenomeLoc>();
-
-        try {
-            while (location + size < stop) {
-            logger.debug("s = " + s.getSequenceName() + " " + location + " " + size);
-            // lets make up some fake locations
-            GenomeLoc gl = new GenomeLoc(s.getSequenceName(), location, location + size - 1);
-            logger.debug("loc = " + location);
-
-            // let's move the location up, with a size space
-            location += (size * 2);
-
-            // add our current location to the list
-            locations.add(gl);
-
-            // add another genome location
-            ++genomeLocs;
-        }
-        } catch (Exception e) {
-            e.printStackTrace();
-        }
-        logger.debug("Location count = " + genomeLocs);
-        ShardStrategy strategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR, seq.getSequenceDictionary(), 5000, locations);
-        int shardCount = 0;
-        try {
-            for (Shard sh : strategy) {
-                GenomeLoc l = sh.getGenomeLoc();
-
-                logger.debug("Shard start: " + l.getStart() + " stop " + l.getStop() + " contig " + l.getContig());
-                shardCount++;
-            }
-
-             logger.debug("Shard count = " + shardCount); 
-            assertEquals(shardCount, genomeLocs * 2);
-
-        } catch (Exception e) {
-            e.printStackTrace();
-            fail("testIntervalGenomeCycle: ne exception expected");
-        }
+    public void testReadInterval() {
+        GenomeLoc l = new GenomeLoc(0,1,100);
+        set.add(l);
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,header.getSequenceDictionary(),100,set);
+        assertTrue(st instanceof ReadIntervalShardStrategy);
    }

+    @Test
+    public void testLinearNonInterval() {
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100);
+        assertTrue(st instanceof LinearLocusShardStrategy);
+    }
+
+     @Test
+    public void testExpNonInterval() {
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100);
+        assertTrue(st instanceof ExpGrowthLocusShardStrategy);
+    }
+
+    @Test
+    public void testExpInterval() {
+        GenomeLoc l = new GenomeLoc(0,1,100);
+        set.add(l);
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.EXPONENTIAL,header.getSequenceDictionary(),100,set);
+        assertTrue(st instanceof ExpGrowthLocusShardStrategy);
+    }
+
+    @Test
+    public void testLinearInterval() {
+        GenomeLoc l = new GenomeLoc(0,1,100);
+        set.add(l);
+        ShardStrategy st = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,header.getSequenceDictionary(),100,set);
+        assertTrue(st instanceof LinearLocusShardStrategy);
+    }
+    
 }
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMByReadsTest.java
@ -75,7 +75,7 @@ public class SAMByReadsTest extends BaseTest {

        final int targetReadCount = 5000;
        
-        ShardStrategy shardStrategy = ShardStrategyFactory.shatterByReadCount(seq.getSequenceDictionary(),targetReadCount);
+        ShardStrategy shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.READS,seq.getSequenceDictionary(),targetReadCount);
        
        try {
            SAMDataSource data = new SAMDataSource(reads);
--- a/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java
+++ b/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetTest.java
@ -29,7 +29,6 @@ import java.util.Iterator;
 /**
 * @author aaron
 * @version 1.0
- * @date May 22, 2009
 * <p/>
 * Class GenomeLocSetTest
 * <p/>
@ -142,6 +141,32 @@ public class GenomeLocSortedSetTest extends BaseTest {
        assertTrue(loc.getContigIndex() == 1);
    }

+    @Test
+    public void deleteAllByRegion() {
+        GenomeLoc e = new GenomeLoc(1, 1, 100);
+        mSortedSet.add(e);
+        for (int x = 1; x < 101; x++) {
+            GenomeLoc del = new GenomeLoc(1,x,x);
+            mSortedSet.removeRegion(del);
+        }
+        assertTrue(mSortedSet.isEmpty());
+    }
+    @Test
+    public void deleteSomeByRegion() {
+        GenomeLoc e = new GenomeLoc(1, 1, 100);
+        mSortedSet.add(e);
+        for (int x = 1; x < 50; x++) {
+            GenomeLoc del = new GenomeLoc(1,x,x);
+            mSortedSet.removeRegion(del);
+        }
+        assertTrue(!mSortedSet.isEmpty());
+        assertTrue(mSortedSet.size() == 1);
+        GenomeLoc loc = mSortedSet.iterator().next();        
+        assertTrue(loc.getStop() == 100);
+        assertTrue(loc.getStart() == 50);
+
+    }
+
    @Test
    public void deleteSuperRegion() {
        GenomeLoc e = new GenomeLoc(1, 10, 20);