added support for the -M option in traversals.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@935 348d0f76-0448-11de-a6fe-93d51630548a
2009-06-08 15:12:24 +00:00 · 2009-06-08 15:12:24 +00:00 · a8a2d0eab9
parent e2ed56dc96
commit a8a2d0eab9
13 changed files with 214 additions and 157 deletions
--- a/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java
+++ b/java/src/org/broadinstitute/sting/gatk/GATKArgumentCollection.java
@ -105,8 +105,8 @@ public class GATKArgumentCollection {
    public Boolean walkAllLoci = false;    

    @Element(required=false)
-    @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of reads to process before exiting", required = false)
-    public String maximumReads = "-1";
+    @Argument(fullName = "maximum_reads", shortName = "M", doc = "Maximum number of iterations to process before exiting, the lower bound is zero.  Intended only for testing", required = false)
+    public Integer maximumEngineIterations = -1;

    @Element(required=false)
    @Argument(fullName = "sort_on_the_fly", shortName = "sort", doc = "Maximum number of reads to sort on the fly", required = false)
@ -215,7 +215,7 @@ public class GATKArgumentCollection {
        if (!other.samFiles.equals(this.samFiles)) {
            return false;
        }
-        if (!other.maximumReads.equals(this.maximumReads)) {
+        if (!other.maximumEngineIterations.equals(this.maximumEngineIterations)) {
            return false;
        }
        if (!other.strictnessLevel.equals(this.strictnessLevel)) {
--- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -122,7 +122,7 @@ public class GenomeAnalysisEngine {
            locs = GenomeLocSortedSet.createSetFromList(locationsList);

        // excute the microscheduler, storing the results
-        walkerReturn = microScheduler.execute(my_walker, locs);
+        walkerReturn = microScheduler.execute(my_walker, locs, argCollection.maximumEngineIterations);
    }


@ -199,7 +199,7 @@ public class GenomeAnalysisEngine {
    private void genericEngineSetup(ValidationStringency strictness) {
        engine.setStrictness(strictness);

-        engine.setMaxReads(Integer.parseInt(argCollection.maximumReads));
+        engine.setMaxReads(argCollection.maximumEngineIterations);
        engine.setFilterZeroMappingQualityReads(argCollection.filterZeroMappingQualityReads);

        // we default interval files over the genome region string
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ExpGrowthLocusShardStrategy.java
@ -52,8 +52,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
     *
     * @param dic the seq dictionary
     */
-    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
+    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
        super(dic);
+        this.limitingFactor = limitByCount;
        this.baseSize = startSize;
        currentExp = 0;
    }
@ -76,8 +77,9 @@ public class ExpGrowthLocusShardStrategy extends LocusShardStrategy {
     * @param startSize the starting size of the shard
     * @param lst       locations to iterate from
     */
-    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
+    ExpGrowthLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
        super(dic, lst);
+        this.limitingFactor = limitByCount;
        this.baseSize = startSize;
        this.currentExp = 0;
    }
--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategy.java
@ -51,8 +51,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
     *
     * @param dic the seq dictionary
     */
-    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize) {
+    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, long limitByCount) {
        super(dic);
+        this.limitingFactor = limitByCount;
        this.nextShardSize = startSize;
    }

@ -73,8 +74,9 @@ class LinearLocusShardStrategy extends LocusShardStrategy {
     * @param startSize the starting size of the shard
     * @param lst locations to iterate from
     */
-    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst) {
+    LinearLocusShardStrategy(SAMSequenceDictionary dic, long startSize, GenomeLocSortedSet lst, long limitByCount) {
        super(dic, lst);
+        this.limitingFactor = limitByCount;
        this.nextShardSize = startSize;
    }

--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/LocusShardStrategy.java
@ -51,14 +51,18 @@ public abstract class LocusShardStrategy implements ShardStrategy {
    /** our log, which we want to capture anything from this class */
    private static Logger logger = Logger.getLogger(LocusShardStrategy.class);

+    /** the number of iterations before we stop */
+    protected long limitingFactor = -1;
+    private boolean stopDueToLimitingFactor = false;

    /**
     * the constructor, taking a seq dictionary to parse out contigs
     *
     * @param dic the seq dictionary
     */
-    LocusShardStrategy(SAMSequenceDictionary dic) {
+    LocusShardStrategy( SAMSequenceDictionary dic ) {
        this.dic = dic;
+        limitingFactor = -1;
        mLoc = new GenomeLoc(0, 0, 0);
        if (dic.getSequences().size() > 0) {
            nextContig = true;
@ -70,12 +74,13 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     *
     * @param old the old strategy
     */
-    LocusShardStrategy(LocusShardStrategy old) {
+    LocusShardStrategy( LocusShardStrategy old ) {
        this.dic = old.dic;
        this.mLoc = old.mLoc;
        this.seqLoc = old.seqLoc;
        this.lastGenomeLocSize = old.lastGenomeLocSize;
        this.nextContig = old.nextContig;
+        this.limitingFactor = old.limitingFactor;
    }


@ -85,7 +90,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * @param dic       the seq dictionary
     * @param intervals file
     */
-    LocusShardStrategy(SAMSequenceDictionary dic, GenomeLocSortedSet intervals) {
+    LocusShardStrategy( SAMSequenceDictionary dic, GenomeLocSortedSet intervals ) {
        this.dic = dic;
        this.intervals = intervals.clone();
        // set the starting point to the beginning interval
@ -132,6 +137,15 @@ public abstract class LocusShardStrategy implements ShardStrategy {
        long proposedSize = nextShardSize();
        long nextStart = mLoc.getStop() + 1;

+        if (this.limitingFactor > 0) {
+            if (proposedSize < limitingFactor) {
+                limitingFactor = limitingFactor - proposedSize;
+            } else {
+                proposedSize = limitingFactor;
+                this.stopDueToLimitingFactor = true;
+            }
+        }
+
        // if we don't have an interval set, use the non interval based approach.  Simple, eh?
        if (this.intervals == null) {
            return nonIntervaledNext(length, proposedSize, nextStart);
@ -148,8 +162,8 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     *
     * @return the shard that represents this data
     */
-    private Shard intervaledNext(long proposedSize) {
-        if ((this.intervals == null) || (intervals.isEmpty())) {
+    private Shard intervaledNext( long proposedSize ) {
+        if (( this.intervals == null ) || ( intervals.isEmpty() )) {
            throw new StingException("LocusShardStrategy: genomic regions list is empty in next() function.");
        }

@ -175,12 +189,12 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     *
     * @return the shard to return to the user
     */
-    private Shard nonIntervaledNext(long length, long proposedSize, long nextStart) {
+    private Shard nonIntervaledNext( long length, long proposedSize, long nextStart ) {
        // can we fit it into the current seq size?
        if (nextStart + proposedSize - 1 < length) {
            lastGenomeLocSize = proposedSize;
            mLoc = new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1);
-            return LocusShard.toShard(new GenomeLoc(dic.getSequence(seqLoc).getSequenceIndex(), nextStart, nextStart + proposedSize - 1));
+            return LocusShard.toShard(mLoc);
        }
        // else we can't make it in the current location, we have to stitch one together
        else {
@ -207,7 +221,7 @@ public abstract class LocusShardStrategy implements ShardStrategy {
    private void jumpContig() {
        ++seqLoc;

-        if (!(seqLoc < dic.getSequences().size())) {
+        if (!( seqLoc < dic.getSequences().size() )) {
            nextContig = false;
            return;
        }
@ -223,11 +237,14 @@ public abstract class LocusShardStrategy implements ShardStrategy {
     * @return
     */
    public boolean hasNext() {
+        if (this.stopDueToLimitingFactor) {
+            return false;
+        }
        // if we don't have an interval file, use the non interval based approach.
        if (this.intervals == null) {
            return nextContig;
        } else {
-            return (this.intervals.size() > 0);
+            return ( this.intervals.size() > 0 );
        }
    }

--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ReadShardStrategy.java
@ -54,14 +54,19 @@ public class ReadShardStrategy implements ShardStrategy {
    // our hasnext flag
    boolean hasNext = true;

+    // our limiting factor
+    long limitedSize = -1;
+    boolean stopDueToLimitingFactor = false;
+
    /**
     * the default constructor
     * @param dic the sequence dictionary to use
     * @param size the read count to iterate over
     */
-    ReadShardStrategy(SAMSequenceDictionary dic, long size) {
+    ReadShardStrategy(SAMSequenceDictionary dic, long size, long limitedSize) {
        this.dic = dic;
-        readCount = size;    
+        readCount = size;
+        this.limitedSize = limitedSize;
    }

    /**
@ -69,10 +74,24 @@ public class ReadShardStrategy implements ShardStrategy {
     * @return
     */
    public boolean hasNext() {
+        if (stopDueToLimitingFactor) {
+            return false;
+        }
        return hasNext;
    }

    public Shard next() {
+        if (limitedSize > 0) {
+            if (limitedSize > readCount) {
+                limitedSize = limitedSize - readCount;
+            }
+            else {
+                readCount = limitedSize;
+                limitedSize = 0;
+                stopDueToLimitingFactor = true;
+            }
+        }
+        
        return new ReadShard((int)readCount, this);
    }

--- a/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/dataSources/shards/ShardStrategyFactory.java
@ -50,13 +50,27 @@ public class ShardStrategyFactory {
     * @return
     */
    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize) {
+        return ShardStrategyFactory.shatter(strat, dic, startingSize, -1L);    
+    }
+
+    /**
+     * get a new shatter strategy
+     *
+     * @param strat        what's our strategy - SHATTER_STRATEGY type
+     * @param dic          the seq dictionary
+     * @param startingSize the starting size
+     * @return
+     */
+    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, long limitByCount) {
        switch (strat) {
            case LINEAR:
-                return new LinearLocusShardStrategy(dic, startingSize);
+                return new LinearLocusShardStrategy(dic, startingSize, limitByCount);
            case EXPONENTIAL:
-                return new ExpGrowthLocusShardStrategy(dic, startingSize);
+                return new ExpGrowthLocusShardStrategy(dic, startingSize, limitByCount);
            case READS:
-                return new ReadShardStrategy(dic, startingSize);
+                return new ReadShardStrategy(dic, startingSize, limitByCount);
+            case INTERVAL:
+                throw new StingException("Requested trategy: " + strat + " doesn't work with the limiting count (-M) command line option");
            default:
                throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
        }
@ -73,11 +87,24 @@ public class ShardStrategyFactory {
     * @return
     */
    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst) {
+        return ShardStrategyFactory.shatter(strat, dic, startingSize, lst, -1l);    
+
+    }
+
+    /**
+     * get a new shatter strategy
+     *
+     * @param strat        what's our strategy - SHATTER_STRATEGY type
+     * @param dic          the seq dictionary
+     * @param startingSize the starting size
+     * @return
+     */
+    static public ShardStrategy shatter(SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocSortedSet lst, long limitDataCount) {
        switch (strat) {
            case LINEAR:
-                return new LinearLocusShardStrategy(dic, startingSize, lst);
+                return new LinearLocusShardStrategy(dic, startingSize, lst, limitDataCount);
            case EXPONENTIAL:
-                return new ExpGrowthLocusShardStrategy(dic, startingSize, lst);
+                return new ExpGrowthLocusShardStrategy(dic, startingSize, lst, limitDataCount);
            case INTERVAL:
            case READS:
                return new IntervalShardStrategy(startingSize, lst);
--- a/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -45,82 +45,69 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     */
    private static final int MAX_OUTSTANDING_OUTPUT_MERGES = 50;

-    /**
-     * Manage currently running threads.
-     */
+    /** Manage currently running threads. */
    private ExecutorService threadPool;

    private Queue<Shard> traverseTasks = new LinkedList<Shard>();
    private Queue<TreeReduceTask> reduceTasks = new LinkedList<TreeReduceTask>();
    private Queue<OutputMerger> outputMergeTasks = new LinkedList<OutputMerger>();

-    /**
-     * How many total tasks were in the queue at the start of run.
-     */
-    private int totalTraversals = 0;    
+    /** How many total tasks were in the queue at the start of run. */
+    private int totalTraversals = 0;

-    /**
-     * How many shard traversals have run to date?
-     */
+    /** How many shard traversals have run to date? */
    private int totalCompletedTraversals = 0;

-    /**
-     * What is the total time spent traversing shards?
-     */
+    /** What is the total time spent traversing shards? */
    private long totalShardTraverseTime = 0;

-    /**
-     * What is the total time spent tree reducing shard output?
-     */
+    /** What is the total time spent tree reducing shard output? */
    private long totalTreeReduceTime = 0;

-    /**
-     * How many tree reduces have been completed?
-     */
+    /** How many tree reduces have been completed? */
    private long totalCompletedTreeReduces = 0;

-    /**
-     * What is the total time spent merging output?
-     */
+    /** What is the total time spent merging output? */
    private long totalOutputMergeTime = 0;

    /**
     * Create a new hierarchical microscheduler to process the given reads and reference.
-     * @param reads Reads file(s) to process.
-     * @param refFile Reference for driving the traversal.
+     *
+     * @param reads         Reads file(s) to process.
+     * @param refFile       Reference for driving the traversal.
     * @param nThreadsToUse maximum number of threads to use to do the work
     */
    protected HierarchicalMicroScheduler( Walker walker, Reads reads, File refFile, List<ReferenceOrderedData<? extends ReferenceOrderedDatum>> rods, int nThreadsToUse ) {
-        super( walker, reads, refFile, rods );
+        super(walker, reads, refFile, rods);
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);

        try {
            MBeanServer mbs = ManagementFactory.getPlatformMBeanServer();
            ObjectName name = new ObjectName("org.broadinstitute.sting.gatk.executive:type=HierarchicalMicroScheduler");
-            mbs.registerMBean(this,name);
+            mbs.registerMBean(this, name);
        }
-        catch( JMException ex ) {
+        catch (JMException ex) {
            throw new StingException("Unable to register microscheduler with JMX", ex);
        }
    }

-    public Object execute( Walker walker, GenomeLocSortedSet intervals ) {
+    public Object execute( Walker walker, GenomeLocSortedSet intervals, Integer maxIterations ) {
        // Fast fail for walkers not supporting TreeReducible interface.
-        if( !(walker instanceof TreeReducible) )
+        if (!( walker instanceof TreeReducible ))
            throw new IllegalArgumentException("Hierarchical microscheduler only works with TreeReducible walkers");

-        ShardStrategy shardStrategy = getShardStrategy( walker, reference, intervals );
-        ReduceTree reduceTree = new ReduceTree( this );        
+        ShardStrategy shardStrategy = getShardStrategy(walker, reference, intervals, maxIterations);
+        ReduceTree reduceTree = new ReduceTree(this);

        walker.initialize();
-        
-        for(Shard shard: shardStrategy)
+
+        for (Shard shard : shardStrategy)
            traverseTasks.add(shard);
        totalTraversals = traverseTasks.size();

-        while( isShardTraversePending() || isTreeReducePending() ) {
+        while (isShardTraversePending() || isTreeReducePending()) {
            // Too many files sitting around taking up space?  Merge them.
-            if( isMergeLimitExceeded() )
+            if (isMergeLimitExceeded())
                mergeExistingOutput();

            // Wait for the next slot in the queue to become free.
@ -128,10 +115,10 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

            // Pick the next most appropriate task and run it.  In the interest of
            // memory conservation, hierarchical reduces always run before traversals.
-            if( isTreeReduceReady() )
-                queueNextTreeReduce( walker );
-            else if( isShardTraversePending() )
-                queueNextShardTraverse( walker, reduceTree );
+            if (isTreeReduceReady())
+                queueNextTreeReduce(walker);
+            else if (isShardTraversePending())
+                queueNextShardTraverse(walker, reduceTree);
        }

        // Merge any lingering output files.  If these files aren't ready,
@ -144,10 +131,10 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        try {
            result = reduceTree.getResult().get();
        }
-        catch(Exception ex) {
-            throw new StingException("Unable to retrieve result", ex );
+        catch (Exception ex) {
+            throw new StingException("Unable to retrieve result", ex);
        }
-        
+
        traversalEngine.printOnTraversalDone(result);
        walker.onTraversalDone(result);

@ -156,6 +143,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

    /**
     * Returns true if there are unscheduled shard traversal waiting to run.
+     *
     * @return true if a shard traversal is waiting; false otherwise.
     */
    protected boolean isShardTraversePending() {
@ -165,10 +153,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
    /**
     * Returns true if there are tree reduces that can be run without
     * blocking.
+     *
     * @return true if a tree reduce is ready; false otherwise.
     */
    protected boolean isTreeReduceReady() {
-        if( reduceTasks.size() == 0 )
+        if (reduceTasks.size() == 0)
            return false;
        return reduceTasks.peek().isReadyForReduce();
    }
@ -177,6 +166,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     * Returns true if there are tree reduces that need to be run before
     * the computation is complete.  Returns true if any entries are in the queue,
     * blocked or otherwise.
+     *
     * @return true if a tree reduce is pending; false otherwise.
     */
    protected boolean isTreeReducePending() {
@ -186,17 +176,18 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
    /**
     * Returns whether the maximum number of files is sitting in the temp directory
     * waiting to be merged back in.
+     *
     * @return True if the merging needs to take priority.  False otherwise.
     */
    protected boolean isMergeLimitExceeded() {
-        if( outputMergeTasks.size() < MAX_OUTSTANDING_OUTPUT_MERGES )
+        if (outputMergeTasks.size() < MAX_OUTSTANDING_OUTPUT_MERGES)
            return false;

        // If any of the first MAX_OUTSTANDING merges aren't ready, the merge limit
        // has not been exceeded.
-        OutputMerger[] outputMergers = outputMergeTasks.toArray( new OutputMerger[0] );
-        for( int i = 0; i < MAX_OUTSTANDING_OUTPUT_MERGES; i++ ) {
-            if( !outputMergers[i].isComplete() )
+        OutputMerger[] outputMergers = outputMergeTasks.toArray(new OutputMerger[0]);
+        for (int i = 0; i < MAX_OUTSTANDING_OUTPUT_MERGES; i++) {
+            if (!outputMergers[i].isComplete())
                return false;
        }

@ -207,10 +198,11 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
    /**
     * Returns whether there is output waiting to be merged into the global output
     * streams right now.
+     *
     * @return True if this output is ready to be merged.  False otherwise.
     */
    protected boolean isOutputMergeReady() {
-        if( outputMergeTasks.size() > 0 )
+        if (outputMergeTasks.size() > 0)
            return outputMergeTasks.peek().isComplete();
        else
            return false;
@ -224,105 +216,99 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        long startTime = System.currentTimeMillis();

        OutputTracker outputTracker = GenomeAnalysisEngine.instance.getOutputTracker();
-        while( isOutputMergeReady() )
-            outputMergeTasks.remove().mergeInto( outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream() );
+        while (isOutputMergeReady())
+            outputMergeTasks.remove().mergeInto(outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream());

        long endTime = System.currentTimeMillis();

-        totalOutputMergeTime += (endTime - startTime);
+        totalOutputMergeTime += ( endTime - startTime );
    }

-    /**
-     * Merge any output that hasn't yet been taken care of by the blocking thread.
-     */
+    /** Merge any output that hasn't yet been taken care of by the blocking thread. */
    protected void mergeRemainingOutput() {
        long startTime = System.currentTimeMillis();

        OutputTracker outputTracker = GenomeAnalysisEngine.instance.getOutputTracker();
-        while( outputMergeTasks.size() > 0 ) {
+        while (outputMergeTasks.size() > 0) {
            OutputMerger outputMerger = outputMergeTasks.remove();
-            synchronized(outputMerger) {
-                if( !outputMerger.isComplete() )
+            synchronized (outputMerger) {
+                if (!outputMerger.isComplete())
                    outputMerger.waitForOutputComplete();
            }
-            outputMerger.mergeInto( outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream() );            
+            outputMerger.mergeInto(outputTracker.getGlobalOutStream(), outputTracker.getGlobalErrStream());
        }

        long endTime = System.currentTimeMillis();

-        totalOutputMergeTime += (endTime - startTime);
+        totalOutputMergeTime += ( endTime - startTime );
    }

    /**
     * Queues the next traversal of a walker from the traversal tasks queue.
-     * @param walker Walker to apply to the dataset.
+     *
+     * @param walker     Walker to apply to the dataset.
     * @param reduceTree Tree of reduces to which to add this shard traverse.
     */
    protected Future queueNextShardTraverse( Walker walker, ReduceTree reduceTree ) {
-        if( traverseTasks.size() == 0 )
-            throw new IllegalStateException( "Cannot traverse; no pending traversals exist.");
+        if (traverseTasks.size() == 0)
+            throw new IllegalStateException("Cannot traverse; no pending traversals exist.");

        Shard shard = traverseTasks.remove();
        OutputMerger outputMerger = new OutputMerger();

-        ShardTraverser traverser = new ShardTraverser( this,
-                                                       getTraversalEngine(),
-                                                       walker,
-                                                       shard,
-                                                       getShardDataProvider(shard),
-                                                       outputMerger );
+        ShardTraverser traverser = new ShardTraverser(this,
+                getTraversalEngine(),
+                walker,
+                shard,
+                getShardDataProvider(shard),
+                outputMerger);

        Future traverseResult = threadPool.submit(traverser);

        // Add this traverse result to the reduce tree.  The reduce tree will call a callback to throw its entries on the queue.
-        reduceTree.addEntry( traverseResult );
+        reduceTree.addEntry(traverseResult);

        // No more data?  Let the reduce tree know so it can finish processing what it's got.
-        if( !isShardTraversePending() )
+        if (!isShardTraversePending())
            reduceTree.complete();

-        outputMergeTasks.add(outputMerger);        
+        outputMergeTasks.add(outputMerger);

        return traverseResult;
    }

-    /**
-     * Pulls the next reduce from the queue and runs it.
-     */
+    /** Pulls the next reduce from the queue and runs it. */
    protected void queueNextTreeReduce( Walker walker ) {
-        if( reduceTasks.size() == 0 )
-            throw new IllegalStateException( "Cannot reduce; no pending reduces exist.");
+        if (reduceTasks.size() == 0)
+            throw new IllegalStateException("Cannot reduce; no pending reduces exist.");
        TreeReduceTask reducer = reduceTasks.remove();
-        reducer.setWalker( (TreeReducible)walker );
+        reducer.setWalker((TreeReducible) walker);

-        threadPool.submit( reducer );
+        threadPool.submit(reducer);
    }

-    /**
-     * Blocks until a free slot appears in the thread queue.
-     */
+    /** Blocks until a free slot appears in the thread queue. */
    protected void waitForFreeQueueSlot() {
        ThreadPoolMonitor monitor = new ThreadPoolMonitor();
-        synchronized(monitor) {
-            threadPool.submit( monitor );
+        synchronized (monitor) {
+            threadPool.submit(monitor);
            monitor.watch();
        }
    }

    /**
     * Callback for adding reduce tasks to the run queue.
+     *
     * @return A new, composite future of the result of this reduce.
     */
    public Future notifyReduce( Future lhs, Future rhs ) {
-        TreeReduceTask reducer = new TreeReduceTask( new TreeReducer( this, lhs, rhs ) );
+        TreeReduceTask reducer = new TreeReduceTask(new TreeReducer(this, lhs, rhs));
        reduceTasks.add(reducer);
-        return reducer; 
+        return reducer;
    }


-    /**
-     * A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics.
-     */
+    /** A small wrapper class that provides the TreeReducer interface along with the FutureTask semantics. */
    private class TreeReduceTask extends FutureTask {
        private TreeReducer treeReducer = null;

@ -332,7 +318,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        }

        public void setWalker( TreeReducible walker ) {
-            treeReducer.setWalker( walker );
+            treeReducer.setWalker(walker);
        }

        public boolean isReadyForReduce() {
@ -342,6 +328,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

    /**
     * Used by the ShardTraverser to report time consumed traversing a given shard.
+     *
     * @param shardTraversalTime Elapsed time traversing a given shard.
     */
    synchronized void reportShardTraverseTime( long shardTraversalTime ) {
@ -351,6 +338,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

    /**
     * Used by the TreeReducer to report time consumed reducing two shards.
+     *
     * @param treeReduceTime Elapsed time reducing two shards.
     */
    synchronized void reportTreeReduceTime( long treeReduceTime ) {
@ -359,69 +347,51 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar

    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public int getTotalNumberOfShards() {
        return totalTraversals;
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public int getRemainingNumberOfShards() {
        return traverseTasks.size();
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public int getNumberOfTasksInReduceQueue() {
-        return reduceTasks.size();        
+        return reduceTasks.size();
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public int getNumberOfTasksInIOQueue() {
        return outputMergeTasks.size();
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public long getTotalShardTraverseTimeMillis() {
        return totalShardTraverseTime;
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public long getAvgShardTraverseTimeMillis() {
-        if( totalCompletedTraversals == 0 )
+        if (totalCompletedTraversals == 0)
            return 0;
        return totalShardTraverseTime / totalCompletedTraversals;
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public long getTotalTreeReduceTimeMillis() {
        return totalTreeReduceTime;
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public long getAvgTreeReduceTimeMillis() {
-        if( totalCompletedTreeReduces == 0 )
+        if (totalCompletedTreeReduces == 0)
            return 0;
        return totalTreeReduceTime / totalCompletedTreeReduces;
    }

-    /**
-     * {@inheritDoc}
-     */
+    /** {@inheritDoc} */
    public long getTotalOutputMergeTimeMillis() {
        return totalOutputMergeTime;
    }
--- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -31,9 +31,10 @@ public class LinearMicroScheduler extends MicroScheduler {
     *
     * @param walker    Computation to perform over dataset.
     * @param locations Subset of the dataset over which to walk.
+     * @param maxIterations the maximum number of iterations we're to perform
     */
-    public Object execute(Walker walker, GenomeLocSortedSet locations) {
-        ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations);
+    public Object execute(Walker walker, GenomeLocSortedSet locations, Integer maxIterations) {
+        ShardStrategy shardStrategy = getShardStrategy(walker, reference, locations, maxIterations);

        walker.initialize();
        Accumulator accumulator = Accumulator.create(walker);
--- a/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -99,9 +99,10 @@ public abstract class MicroScheduler {
     * Walks a walker over the given list of intervals.
     * @param walker Computation to perform over dataset.
     * @param intervals A list of intervals over which to walk.  Null for whole dataset.
+     * @param maxIterations the maximum number of iterations we're to perform
     * @return the return type of the walker
     */
-    public abstract Object execute(Walker walker, GenomeLocSortedSet intervals);
+    public abstract Object execute(Walker walker, GenomeLocSortedSet intervals, Integer maxIterations);

    /**
     * Get the sharding strategy given a driving data source.
@ -110,7 +111,10 @@ public abstract class MicroScheduler {
     * @param intervals Intervals to use when limiting sharding.
     * @return Sharding strategy for this driving data source.
     */
-    protected ShardStrategy getShardStrategy(Walker walker, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
+    protected ShardStrategy getShardStrategy(Walker walker,
+                                             ReferenceSequenceFile drivingDataSource,
+                                             GenomeLocSortedSet intervals,
+                                             Integer maxIterations) {
        ShardStrategy shardStrategy = null;
        ShardStrategyFactory.SHATTER_STRATEGY shardType;
        if (walker instanceof LocusWalker) {
@ -122,11 +126,11 @@ public abstract class MicroScheduler {
                shardStrategy = ShardStrategyFactory.shatter(shardType,
                                                             drivingDataSource.getSequenceDictionary(),
                                                             SHARD_SIZE,
-                                                             intervals);
+                                                             intervals, maxIterations);
            } else
                shardStrategy = ShardStrategyFactory.shatter(ShardStrategyFactory.SHATTER_STRATEGY.LINEAR,
                                                             drivingDataSource.getSequenceDictionary(),
-                                                             SHARD_SIZE);
+                                                             SHARD_SIZE, maxIterations);

        } else if (walker instanceof ReadWalker) {

@ -136,11 +140,11 @@ public abstract class MicroScheduler {
                shardStrategy = ShardStrategyFactory.shatter(shardType,
                                                             drivingDataSource.getSequenceDictionary(),
                                                             SHARD_SIZE,
-                                                             intervals);
+                                                             intervals, maxIterations);
            } else {
                shardStrategy = ShardStrategyFactory.shatter(shardType,
                                                             drivingDataSource.getSequenceDictionary(),
-                                                             SHARD_SIZE);
+                                                             SHARD_SIZE, maxIterations);
            }
        } else
            throw new StingException("Unable to support walker of type" + walker.getClass().getName());
--- a/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/GATKArgumentCollectionTest.java
@ -73,7 +73,7 @@ public class GATKArgumentCollectionTest extends BaseTest {
        List<File> input = new ArrayList<File>();
        input.add(new File("test.file"));
        collect.samFiles = input;
-        collect.maximumReads = "-1";
+        collect.maximumEngineIterations = -1;
        collect.strictnessLevel = "strict";
        collect.referenceFile = new File("referenceFile".toLowerCase());
        collect.analysisName = "analysisName".toLowerCase();
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategyTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/shards/LinearLocusShardStrategyTest.java
@ -58,7 +58,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {

    @Test
    public void testSetup() {
-        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
+        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
        int counter = 0;
        while(strat.hasNext()) {
            Shard d = strat.next();
@ -71,7 +71,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {

    @Test
    public void testAdjustSize() {
-        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500);
+        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 500, -1);
        strat.adjustNextShardSize(1000);
        int counter = 0;
        while(strat.hasNext()) {
@ -86,7 +86,7 @@ public class LinearLocusShardStrategyTest extends BaseTest {

    @Test
    public void testUnevenSplit() {
-        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600);
+        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, -1);
        int counter = 0;
        while(strat.hasNext()) {
            Shard d = strat.next();
@ -100,4 +100,19 @@ public class LinearLocusShardStrategyTest extends BaseTest {
        }
        assertTrue(counter == 10);
    }
+
+
+    @Test
+    public void testDashMOption() {
+        LinearLocusShardStrategy strat = new LinearLocusShardStrategy(header.getSequenceDictionary(), 600, 200);
+        int counter = 0;
+        while(strat.hasNext()) {
+            Shard d = strat.next();
+            assertTrue(d instanceof LocusShard);
+            assertTrue((d.getGenomeLoc().getStop() - d.getGenomeLoc().getStart()) == 199);
+            ++counter;
+        }
+        assertTrue(counter == 1);
+    }
+
 }
--- a/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
+++ b/java/test/org/broadinstitute/sting/gatk/dataSources/simpleDataSources/SAMBAMDataSourceTest.java
@ -129,7 +129,7 @@ public class SAMBAMDataSourceTest extends BaseTest {

        // setup the test files
        fl.add(new File(seqLocation + "/dirseq/analysis/cancer_exome/twoflowcell_sams/TCGA-06-0188.aligned.duplicates_marked.bam"));
-        Reads reads = new Reads(fl);
+        Reads reads = new Reads(fl);           

        ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
        ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();